From 85589f8560934563156dbc3765e929094c5312ef Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Fri, 18 Oct 2019 16:08:34 +0200 Subject: [PATCH 001/171] scheduler merge, new prof events --- src/radical/pilot/agent/scheduler/base.py | 91 +++++++++++++++-------- 1 file changed, 59 insertions(+), 32 deletions(-) diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 6b3168ff41..3067497b51 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -463,6 +463,8 @@ def _refresh_ts_map(self): if not self._waitpool: return + self._prof.prof('tsmap_start') + for uid,task in self._waitpool.iteritems(): ts = task['tuple_size'] if ts not in self._ts_map: @@ -470,6 +472,7 @@ def _refresh_ts_map(self): self._ts_map[ts].add(uid) self._ts_valid = True + self._prof.prof('tsmap_stop') # -------------------------------------------------------------------------- @@ -580,7 +583,7 @@ def _schedule_units(self): resources = True # fresh start, all is free while not self._proc_term.is_set(): - self._log.debug('=== schedule units 0: %s, w: %d', resources, + self._log.debug('=== schedule units : %s, w: %d', resources, len(self._waitpool)) active = 0 # see if we do anything in this iteration @@ -617,8 +620,6 @@ def _schedule_units(self): if not active: time.sleep(0.1) # FIXME: configurable - self._log.debug('=== schedule units x: %s %s', resources, active) - # -------------------------------------------------------------------------- # @@ -650,6 +651,9 @@ def _schedule_waitpool(self): on_skip=self._prof_sched_skip, log=self._log) + for task in scheduled: + self._prof.prof('schedule_wait', uid=task['uid']) + self._waitpool = {task['uid']:task for task in unscheduled} self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, publish=True, push=True) @@ -703,6 +707,7 @@ def _schedule_incoming(self): # task got scheduled - advance state, notify world about the # state change, and push it out toward the next component. + self._prof.prof('schedule_first', uid=unit['uid']) self.advance(unit, rps.AGENT_EXECUTING_PENDING, publish=True, push=True) @@ -718,8 +723,7 @@ def _schedule_incoming(self): # if units remain waiting, we are out of usable resources resources = not bool(to_wait) - # incoming units which have to wait are the only reason to rebuild the - # tuple_size map + # new waiting units are a reason to rebuild the tuple_size map self._ts_valid = False self.slot_status("after schedule incoming") @@ -756,29 +760,51 @@ def _unschedule_completed(self): # immediately. This assumes that the `tuple_size` is good enough to # judge the legality of the resources for the new target unit. - ## ts = tuple(unit['tuple_size']) - ## if self._ts_map.get(ts): - ## - ## replace = self._waitpool[self._ts_map[ts].pop()] - ## replace['slots'] = unit['slots'] - ## placed.append(placed) - ## - ## # unschedule unit A and schedule unit B have the same - ## # timestamp - ## ts = time.time() - ## self._prof.prof('unschedule_stop', uid=unit['uid'], - ## timestamp=ts) - ## self._prof.prof('schedule_fast', uid=replace['uid'], - ## timestamp=ts) - ## self.advance(replace, rps.AGENT_EXECUTING_PENDING, - ## publish=True, push=True) - ## else: - ## - ## # no replacement unit found: free the slots, and try to - ## # schedule other units of other sizes. - ## to_release.append(unit) - - to_release.append(unit) + ts = tuple(unit['tuple_size']) + if not self._ts_map.get(ts): + + # no candidates with matching tuple sizes + to_release.append(unit) + + else: + + # cycle through the matching ts-candidates. Some + # may be invalid by now, having been scheduled via + # `schedule_waitlist`, but if we find any, break the + # search and swap the slots. + replace = None + while not replace: + + # stop search on emptied candidate list + if not self._ts_map[ts]: + del(self._ts_map[ts]) + break + + candidate = self._ts_map[ts].pop() + replace = self._waitpool.get(candidate) + + if not replace: + + # no replacement unit found: free the slots, and try to + # schedule other units of other sizes. + to_release.append(unit) + + else: + + # found one - swap the slots and push out to executor + replace['slots'] = unit['slots'] + placed.append(placed) + + # unschedule unit A and schedule unit B have the same + # timestamp + ts = time.time() + self._prof.prof('unschedule_stop', uid=unit['uid'], + timestamp=ts) + self._prof.prof('schedule_fast', uid=replace['uid'], + timestamp=ts) + self.advance(replace, rps.AGENT_EXECUTING_PENDING, + publish=True, push=True) + if not to_release: if not to_unschedule: @@ -795,10 +821,11 @@ def _unschedule_completed(self): self.unschedule_unit(unit) self._prof.prof('unschedule_stop', uid=unit['uid']) - # we placed some previously waiting units, and need to remove those from - # the waitpool - self._waitpool = {task['uid']:task for task in self._waitpool.values() - if task['uid'] not in placed} + # if previously waiting units were placed, remove them from the waitpool + if placed: + self._waitpool = {task['uid'] : task + for task in self._waitpool.values() + if task['uid'] not in placed} # we have new resources, and were active return True, True From 3689eed7701b71d3d7270ea93ce245a79f6a2f11 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 9 Dec 2019 17:45:09 +0100 Subject: [PATCH 002/171] fix coversion call --- src/radical/pilot/configs/agent_debug_sa.json | 110 ++++++++++++++++++ src/radical/pilot/configs/pmgr_default.json | 9 +- src/radical/pilot/configs/resource_debug.json | 51 ++------ .../pilot/configs/session_default.json | 10 +- src/radical/pilot/configs/umgr_default.json | 29 +++-- src/radical/pilot/utils/misc.py | 2 +- 6 files changed, 145 insertions(+), 66 deletions(-) create mode 100644 src/radical/pilot/configs/agent_debug_sa.json diff --git a/src/radical/pilot/configs/agent_debug_sa.json b/src/radical/pilot/configs/agent_debug_sa.json new file mode 100644 index 0000000000..b07a1431b7 --- /dev/null +++ b/src/radical/pilot/configs/agent_debug_sa.json @@ -0,0 +1,110 @@ + +{ + "staging_area" : "staging_area", + "staging_schema" : "staging", + "max_io_loglength" : 1024, + "db_poll_sleeptime" : 5.0, + + "bulk_time" : 10.0, + "bulk_size" : 4096, + + "heartbeat" : { + "interval" : 10.0, + "timeout" : 30.0 + }, + + "target" : "local", + "bridges" : { + "agent_staging_input_queue" : { "kind" : "queue", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + "agent_scheduling_queue" : { "kind" : "queue", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + "agent_executing_queue" : { "kind" : "queue", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + "agent_staging_output_queue" : { "kind" : "queue", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + + "funcs_wrk_queue" : { "kind" : "queue", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 0}, + "funcs_res_queue" : { "kind" : "queue", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 0}, + + "agent_unschedule_pubsub" : { "kind" : "pubsub", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + "agent_schedule_pubsub" : { "kind" : "pubsub", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + + "control_pubsub" : { "kind" : "pubsub", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + "state_pubsub" : { "kind" : "pubsub", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024}, + "log_pubsub" : { "kind" : "pubsub", + "log_level" : "error", + "stall_hwm" : 1, + "bulk_size" : 1024} + }, + + "components" : { + "update" : {"count" : 1}, + "agent_staging_input" : {"count" : 4}, + "agent_scheduling" : {"count" : 1}, + "agent_executing" : {"count" : 4}, + "agent_staging_output" : {"count" : 4} + # }, + # + # "agents": { + # "agent.1": { + # "target": "node", + # "components": { + # "agent_staging_input" : {"count" : 1}, + # "agent_executing" : {"count" : 1}, + # "agent_staging_output" : {"count" : 1} + # } + # }, + # "agent.2": { + # "target": "node", + # "components": { + # "agent_staging_input" : {"count" : 1}, + # "agent_executing" : {"count" : 1}, + # "agent_staging_output" : {"count" : 1} + # } + # }, + # "agent_3": { + # "target": "node", + # "components": { + # "agent_staging_input" : {"count" : 1}, + # "agent_executing" : {"count" : 1}, + # "agent_staging_output" : {"count" : 1} + # } + # }, + # "agent_4": { + # "target": "node", + # "components": { + # "agent_staging_input" : {"count" : 1}, + # "agent_executing" : {"count" : 1}, + # "agent_staging_output" : {"count" : 1} + # } + # } + } +} + diff --git a/src/radical/pilot/configs/pmgr_default.json b/src/radical/pilot/configs/pmgr_default.json index c4dbf59a16..f26317ce02 100644 --- a/src/radical/pilot/configs/pmgr_default.json +++ b/src/radical/pilot/configs/pmgr_default.json @@ -4,13 +4,18 @@ # a functional pilot manager. { # time to sleep between database polls (seconds) - "db_poll_sleeptime" : 10.0, + "db_poll_sleeptime" : 60.0, + + "heartbeat" : { + "interval" : 3.0, + "timeout" : 10.0 + }, "bridges" : { "pmgr_launching_queue" : {"kind" : "queue", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0} + "bulk_size" : 1024} }, "components" : { diff --git a/src/radical/pilot/configs/resource_debug.json b/src/radical/pilot/configs/resource_debug.json index b1641cd05e..ee13466220 100644 --- a/src/radical/pilot/configs/resource_debug.json +++ b/src/radical/pilot/configs/resource_debug.json @@ -1,6 +1,6 @@ { - "local": { + "summit": { "description" : "", "notes" : "", "schemas" : ["local"], @@ -12,59 +12,24 @@ "default_queue" : "", "lrms" : "FORK", "lfs_per_node" : "/tmp", - "agent_config" : "default_sa", + "agent_config" : "debug_sa", "agent_scheduler" : "CONTINUOUS", - "agent_spawner" : "POPEN", + "agent_spawner" : "SLEEP", "agent_launch_method" : "FORK", - "task_launch_method" : "FORK", - "mpi_launch_method" : "MPIRUN", + "task_launch_method" : "NOOP", + "mpi_launch_method" : "NOOP", "pre_bootstrap_0" : [], "pre_bootstrap_1" : [], "default_remote_workdir" : "$HOME/", "valid_roots" : ["/"], "rp_version" : "installed", "virtenv_mode" : "use", - "virtenv" : "/home/merzky/radical/radical.pilot.3/ve3/", - "stage_cacerts" : true, - "python_dist" : "default", - "virtenv_dist" : "system", - "cores_per_node" : 16, - "gpus_per_node" : 4, - "lfs_per_node" : {"path" : "/tmp", - "size" : 100} - }, - - "summit": { - "description" : "", - "notes" : "", - "schemas" : ["local"], - "local" : { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" - }, - "default_queue" : "", - "lrms" : "DEBUG", - "lfs_per_node" : "/tmp", - "agent_config" : "default", - "agent_scheduler" : "CONTINUOUS_SUMMIT", - "agent_spawner" : "SHELLFS", - "agent_launch_method" : "FORK", - "task_launch_method" : "FORK", - "mpi_launch_method" : "FORK", - "pre_bootstrap_0" : [ ], - "pre_bootstrap_1" : [ ], - "default_remote_workdir" : "$HOME/", - "valid_roots" : ["/"], - "rp_version" : "installed", - "virtenv_mode" : "use", - "virtenv" : "/home/merzky/radical/radical.pilot.test/ve/", + "virtenv" : "/home/merzky/radical/radical.pilot.test/ve3/", "stage_cacerts" : true, "python_dist" : "default", "virtenv_dist" : "system", - "cores_per_socket" : 21, - "gpus_per_socket" : 3, - "sockets_per_node" : 2, + "cores_per_node" : 42, + "gpus_per_node" : 6, "lfs_per_node" : {"path" : "/tmp", "size" : 100} } diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index a82a896c74..3ba7657cfc 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -11,23 +11,23 @@ "bulk_size" : 1024, "heartbeat" : { - "interval" : 0.5, - "timeout" : 1.0 + "interval" : 3.0, + "timeout" : 10.0 }, "bridges" : { "log_pubsub" : {"kind" : "pubsub", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0}, + "bulk_size" : 1024}, "state_pubsub" : {"kind" : "pubsub", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0}, + "bulk_size" : 1024}, "control_pubsub" : {"kind" : "pubsub", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0} + "bulk_size" : 1024} }, "components" : { diff --git a/src/radical/pilot/configs/umgr_default.json b/src/radical/pilot/configs/umgr_default.json index a784c883f6..3bd8e09c69 100644 --- a/src/radical/pilot/configs/umgr_default.json +++ b/src/radical/pilot/configs/umgr_default.json @@ -3,47 +3,46 @@ # specified. It contains the minimal set of settings required for # a functional unit manager. { - # default scheduler "scheduler" : "round_robin", + "bulk_size" : 4096, + "bulk_time" : 10.0, - # max number of updates to put into a db bulk - "bulk_collection_size" : 100, + "db_poll_sleeptime" : 10.0, - # max time period to collect db notifications into bulks (seconds) - "bulk_collection_time" : 1.0, - - # time to sleep between database polls (seconds) - "db_poll_sleeptime" : 1.0, + "heartbeat" : { + "interval" : 3.0, + "timeout" : 10.0 + }, "bridges" : { "umgr_staging_input_queue" : {"kind" : "queue", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0}, + "bulk_size" : 1024}, "umgr_scheduling_queue" : {"kind" : "queue", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0}, + "bulk_size" : 1024}, "umgr_staging_output_queue" : {"kind" : "queue", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0}, + "bulk_size" : 1024}, "umgr_unschedule_pubsub" : {"kind" : "pubsub", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0}, + "bulk_size" : 1024}, "umgr_reschedule_pubsub" : {"kind" : "pubsub", "log_level" : "error", "stall_hwm" : 1, - "bulk_size" : 0} + "bulk_size" : 1024} }, "components" : { # how many instances of the respective components should be started - "umgr_staging_input" : { "count" : 1}, + "umgr_staging_input" : { "count" : 4}, "umgr_scheduling" : { "count" : 1}, - "umgr_staging_output" : { "count" : 1} + "umgr_staging_output" : { "count" : 4} } } diff --git a/src/radical/pilot/utils/misc.py b/src/radical/pilot/utils/misc.py index dabee3d5cd..e7017f39aa 100644 --- a/src/radical/pilot/utils/misc.py +++ b/src/radical/pilot/utils/misc.py @@ -89,7 +89,7 @@ def write_dir(path): cksum = 256 + sum(ord(h) for h in data) data = rpad(data , 512) data = data [:-364] + '%06o\0' % cksum + data[-357:] - fout.write(ru.to_byte(data)) + fout.write(ru.as_bytes(data)) for dname in dnames: write_dir(dname) From 41902c0d600dbdbf799f579e16053092050edfd7 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 13 Apr 2020 11:21:00 +0200 Subject: [PATCH 003/171] remove obsolete file --- src/radical/pilot/utils/pubsub.py | 399 ------------------------------ 1 file changed, 399 deletions(-) delete mode 100644 src/radical/pilot/utils/pubsub.py diff --git a/src/radical/pilot/utils/pubsub.py b/src/radical/pilot/utils/pubsub.py deleted file mode 100644 index 20e6f01554..0000000000 --- a/src/radical/pilot/utils/pubsub.py +++ /dev/null @@ -1,399 +0,0 @@ - -import os -import sys -import zmq -import copy -import math -import time -import errno -import queue -import pprint -import msgpack - -import setproctitle as spt -import threading as mt -import multiprocessing as mp -import threading as mt - -import radical.utils as ru - -from .misc import hostip as rpu_hostip - - -# -------------------------------------------------------------------------- -# defines for pubsub roles -# -PUBSUB_PUB = 'pub' -PUBSUB_SUB = 'sub' -PUBSUB_BRIDGE = 'bridge' -PUBSUB_ROLES = [PUBSUB_PUB, PUBSUB_SUB, PUBSUB_BRIDGE] -PUBSUB_TSTART = 1.0 # time needed to bootstrap a publisher - -_USE_MULTIPART = True # send [topic, data] as multipart message -_BRIDGE_TIMEOUT = 5 # how long to wait for bridge startup -_LINGER_TIMEOUT = 250 # ms to linger after close -_HIGH_WATER_MARK = 0 # number of messages to buffer before dropping - - -# -------------------------------------------------------------------------- -# -# zmq will (rightly) barf at interrupted system calls. We are able to rerun -# those calls. -# -# FIXME: how does that behave wrt. tomeouts? We probably should include -# an explicit timeout parameter. -# -# kudos: https://gist.github.com/minrk/5258909 -# -def _uninterruptible(f, *args, **kwargs): - cnt = 0 - while True: - cnt += 1 - try: - return f(*args, **kwargs) - except zmq.ZMQError as e: - if e.errno == errno.EINTR: - if cnt > 10: - raise - # interrupted, try again - continue - else: - # real error, raise it - raise - - -# ------------------------------------------------------------------------------ -# -# Notifications between components are based on pubsub channels. Those channels -# have different scope (bound to the channel name). Only one specific topic is -# predefined: 'state' will be used for unit state updates. -# -class Pubsub(ru.Process): - - def __init__(self, session, channel, role, cfg, addr=None): - """ - Addresses are of the form 'tcp://host:port'. Both 'host' and 'port' can - be wildcards for BRIDGE roles -- the bridge will report the in and out - addresses as obj.addr_in and obj.addr_out. - """ - - self._session = session - self._channel = channel - self._role = role - self._cfg = copy.deepcopy(cfg) - self._addr = addr - self._lock = mt.Lock() - self._tstart = time.time() - - assert(self._role in PUBSUB_ROLES), 'invalid role %s' % self._role - - self._uid = "%s.%s" % (self._channel.replace('_', '.'), self._role) - self._uid = ru.generate_id(self._uid) - self._log = self._session._get_logger(name=self._uid, - level=self._cfg.get('log_level')) - - # avoid superfluous logging calls in critical code sections - if self._log.getEffectiveLevel() == 10: # logging.DEBUG: - self._debug = True - else: - self._debug = False - - self._addr_in = None # bridge input addr - self._addr_out = None # bridge output addr - - self._q = None - self._in = None - self._out = None - self._ctx = None - - if not self._addr: - self._addr = 'tcp://*:*' - - self._log.info("create %s - %s - %s", self._channel, self._role, self._addr) - - super(Pubsub, self).__init__(name=self._uid, log=self._log) - - - # ---------------------------------------------------------------------- - # behavior depends on the role... - if self._role == PUBSUB_PUB: - - self._ctx = zmq.Context() - self._session._to_destroy.append(self._ctx) - - self._q = self._ctx.socket(zmq.PUB) - self._q.linger = _LINGER_TIMEOUT - self._q.hwm = _HIGH_WATER_MARK - self._q.connect(self._addr) - self.start(spawn=False) - - - # ---------------------------------------------------------------------- - elif self._role == PUBSUB_BRIDGE: - - # we expect bridges to always use a port wildcard. Make sure - # that's the case - elems = self._addr.split(':') - if len(elems) > 2 and elems[2] and elems[2] != '*': - raise RuntimeError('wildcard port (*) required for bridge addresses (%s)' \ - % self._addr) - - self._pqueue = mp.Queue() - self.start() - - try: - [addr_in, addr_out] = self._pqueue.get(True, _BRIDGE_TIMEOUT) - - # store addresses - self._addr_in = ru.Url(addr_in) - self._addr_out = ru.Url(addr_out) - - # use the local hostip for bridge addresses - self._addr_in.host = rpu_hostip() - self._addr_out.host = rpu_hostip() - - except queue.Empty as e: - raise RuntimeError ("bridge did not come up! (%s)" % e) - - - # ---------------------------------------------------------------------- - elif self._role == PUBSUB_SUB: - - self._ctx = zmq.Context() - self._session._to_destroy.append(self._ctx) - - self._q = self._ctx.socket(zmq.SUB) - self._q.linger = _LINGER_TIMEOUT - self._q.hwm = _HIGH_WATER_MARK - self._q.connect(self._addr) - self.start(spawn=False) - - - # -------------------------------------------------------------------------- - # - @property - def name(self): - return self._uid - - @property - def uid(self): - return self._uid - - @property - def channel(self): - return self._channel - - @property - def role(self): - return self._role - - @property - def addr(self): - return self._addr - - @property - def addr_in(self): - assert(self._role == PUBSUB_BRIDGE), 'addr_in only set on bridges' - return self._addr_in - - @property - def addr_out(self): - assert(self._role == PUBSUB_BRIDGE), 'addr_out only set on bridges' - return self._addr_out - - - # -------------------------------------------------------------------------- - # - def ru_initialize_child(self): - - assert(self._role == PUBSUB_BRIDGE), 'only bridges can be started' - - self._uid = self._uid + '.child' - self._log = self._session._get_logger(name=self._uid, - level=self._cfg.get('log_level')) - - spt.setproctitle('rp.%s' % self._uid) - self._log.info('start bridge %s on %s', self._uid, self._addr) - - self._ctx = zmq.Context() - self._session._to_destroy.append(self._ctx) - - self._in = self._ctx.socket(zmq.XSUB) - self._in.linger = _LINGER_TIMEOUT - self._in.hwm = _HIGH_WATER_MARK - self._in.bind(self._addr) - - self._out = self._ctx.socket(zmq.XPUB) - self._out.linger = _LINGER_TIMEOUT - self._out.hwm = _HIGH_WATER_MARK - self._out.bind(self._addr) - - # start polling for messages - self._poll = zmq.Poller() - self._poll.register(self._in, zmq.POLLIN) - self._poll.register(self._out, zmq.POLLIN) - - # communicate the bridge ports to the parent process - _addr_in = ru.to_string(self._in.getsockopt( zmq.LAST_ENDPOINT)) - _addr_out = ru.to_string(self._out.getsockopt(zmq.LAST_ENDPOINT)) - - - self._pqueue.put([_addr_in, _addr_out]) - - self._log.info('bound bridge %s to %s : %s', self._uid, _addr_in, _addr_out) - - - # -------------------------------------------------------------------------- - # - def ru_finalize_common(self): - - if self._q : self._q .close() - if self._in : self._in .close() - if self._out : self._out.close() - if self._ctx : self._ctx.destroy() - pass - - - # -------------------------------------------------------------------------- - # - def work_cb(self): - - self._log.debug('=== work') - with self._lock: - _socks = dict(_uninterruptible(self._poll.poll, timeout=1000)) # timeout in ms - - if self._in in _socks: - - # if any incoming socket signals a message, get the - # message on the subscriber channel, and forward it - # to the publishing channel, no questions asked. - if _USE_MULTIPART: - with self._lock: - msg = _uninterruptible(self._in.recv_multipart, flags=zmq.NOBLOCK) - _uninterruptible(self._out.send_multipart, msg) - else: - msg = _uninterruptible(self._in.recv, flags=zmq.NOBLOCK) - _uninterruptible(self._out.send, msg) - - if self._debug: - self._log.debug("-> %s", pprint.pformat(msg)) - - - if self._out in _socks: - - # if any outgoing socket signals a message, it's - # likely a topic subscription. We forward that on - # the incoming channels to subscribe for the - # respective messages. - if _USE_MULTIPART: - with self._lock: - msg = _uninterruptible(self._out.recv_multipart) - _uninterruptible(self._in.send_multipart, msg) - else: - msg = _uninterruptible(self._out.recv) - _uninterruptible(self._in.send, msg) - - if self._debug: - self._log.debug("<- %s", pprint.pformat(msg)) - - return True - - - # -------------------------------------------------------------------------- - # - def subscribe(self, topic): - - assert(self._role == PUBSUB_SUB), 'incorrect role on subscribe' - - topic = ru.to_byte(topic.replace(' ', '_')) - - self._log.debug("~~ %s", topic) - - with self._lock: - _uninterruptible(self._q.setsockopt, zmq.SUBSCRIBE, topic) - - - # -------------------------------------------------------------------------- - # - def put(self, topic, msg): - - assert(self._role == PUBSUB_PUB), 'incorrect role on put' - assert(isinstance(msg,dict)), 'invalide message type' - - # ensure the publisher is old enough to not loose messages - # see 'slow joiner' in http://zguide.zeromq.org/page:all - diff = time.time() - self._tstart - if diff < PUBSUB_TSTART: - time.sleep(PUBSUB_TSTART - diff) - - self._log.debug("?> %s", pprint.pformat(msg)) - - topic = ru.to_byte(topic.replace(' ', '_')) - data = msgpack.packb(msg) - - if self._debug: - self._log.debug("-> %s %s", topic, pprint.pformat(msg)) - - if _USE_MULTIPART: - with self._lock: - _uninterruptible(self._q.send_multipart, [topic, data]) - - else: - if self._debug: - self._log.debug("-> %s %s", topic, pprint.pformat(msg)) - _uninterruptible(self._q.send, "%s %s" % (topic, data)) - - - # -------------------------------------------------------------------------- - # - def get(self): - - assert(self._role == PUBSUB_SUB), 'invalid role on get' - - # FIXME: add timeout to allow for graceful termination - - if _USE_MULTIPART: - with self._lock: - topic, data = _uninterruptible(self._q.recv_multipart) - - else: - raw = _uninterruptible(self._q.recv) - topic, data = raw.split(' ', 1) - - msg = msgpack.unpackb(data, raw=False) # we want non-byte types back - - if self._debug: - self._log.debug("<- %s", ([topic, pprint.pformat(msg)])) - - return [topic, msg] - - - # -------------------------------------------------------------------------- - # - def get_nowait(self, timeout=None): # timeout in ms - - assert(self._role == PUBSUB_SUB), 'invalid role on get_nowait' - - if _uninterruptible(self._q.poll, flags=zmq.POLLIN, timeout=timeout): - - if _USE_MULTIPART: - with self._lock: - topic, data = _uninterruptible(self._q.recv_multipart, - flags=zmq.NOBLOCK) - else: - raw = _uninterruptible(self._q.recv) - topic, data = raw.split(' ', 1) - - msg = msgpack.unpackb(data, raw=False) - - if self._debug: - self._log.debug("<< %s", ([topic, pprint.pformat(msg)])) - - return [topic, msg] - - else: - return [None, None] - - -# ------------------------------------------------------------------------------ - From ff4a7b9b0353adbda9a971ac4a68cd9e07d9209a Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 5 Feb 2021 10:54:16 +0100 Subject: [PATCH 004/171] minor cleanup --- bin/radical-pilot-component | 4 +++- src/radical/pilot/agent/agent_0.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 226230e8c9..241bab53db 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -66,6 +66,8 @@ def main(cfg): prof.prof('comp_stop', uid=cfg.uid) +# ------------------------------------------------------------------------------ +# def wrapped_main(cfg, log, prof): term = mt.Event() @@ -80,7 +82,7 @@ def wrapped_main(cfg, log, prof): comp.start() # component runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', cfg.heartbeat.addr_pub) #, log=log) + hb_pub = ru.zmq.Publisher('heartbeat', cfg.heartbeat.addr_pub) # , log=log) def hb_beat_cb(): hb_pub.put('heartbeat', msg={'uid': cfg.uid}) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 4d76bd71e2..48d4253f78 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -89,7 +89,7 @@ def __init__(self, cfg, session): # run our own slow-paced heartbeat monitor to watch pmgr heartbeats # FIXME: we need to get pmgr freq - freq = 10 + freq = 100 tint = freq / 3 tout = freq * 3 self._hb = ru.Heartbeat(uid=self._pid, From 0f064b197fce5aa2c1722ef1e6d81cda1112fc5e Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sun, 7 Feb 2021 10:32:52 +0100 Subject: [PATCH 005/171] prepare for merge --- src/radical/pilot/agent/executing/sleep.py | 10 +++-- src/radical/pilot/agent/scheduler/base.py | 49 +++++++++++----------- src/radical/pilot/utils/component.py | 27 ++++++------ src/radical/pilot/utils/prof_utils.py | 2 + 4 files changed, 46 insertions(+), 42 deletions(-) diff --git a/src/radical/pilot/agent/executing/sleep.py b/src/radical/pilot/agent/executing/sleep.py index 1d0e694750..c1b63b5119 100644 --- a/src/radical/pilot/agent/executing/sleep.py +++ b/src/radical/pilot/agent/executing/sleep.py @@ -39,10 +39,10 @@ def initialize(self): self.register_output(rps.AGENT_STAGING_OUTPUT_PENDING, rpc.AGENT_STAGING_OUTPUT_QUEUE) - self.register_publisher (rpc.AGENT_UNSCHEDULE_PUBSUB) + self.register_publisher(rpc.AGENT_UNSCHEDULE_PUBSUB) self._terminate = mt.Event() - self._tasks_lock = ru.RLock() + self._tasks_lock = mt.RLock() self._tasks = list() self._delay = 0.1 @@ -91,13 +91,15 @@ def _timed(self): while not self._terminate.is_set(): - time.sleep(self._delay) - with self._tasks_lock: now = time.time() to_finish = [t for t in self._tasks if t['to_finish'] <= now] self._tasks = [t for t in self._tasks if t['to_finish'] > now] + # if not to_finish: + time.sleep(self._delay) + # continue + for t in to_finish: uid = t['uid'] t['target_state'] = 'DONE' diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 1aadfa5daf..8eded672fa 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -476,7 +476,7 @@ def _refresh_ts_map(self): if not self._waitpool: return - self._prof.prof('tsmap_start') + # self._prof.prof('tsmap_start') for uid,task in self._waitpool.items(): ts = task['tuple_size'] @@ -485,7 +485,7 @@ def _refresh_ts_map(self): self._ts_map[ts].add(uid) self._ts_valid = True - self._prof.prof('tsmap_stop') + # self._prof.prof('tsmap_stop') # -------------------------------------------------------------------------- @@ -630,7 +630,7 @@ def _schedule_tasks(self): # self._log.debug('=== schedule tasks c: %s %s', r, a) if not active: - time.sleep(0.1) # FIXME: configurable + time.sleep(0.01) # FIXME: configurable # self._log.debug('=== schedule tasks x: %s %s', resources, active) @@ -657,8 +657,8 @@ def _schedule_waitpool(self): # tasks = list(self._waitpool.values()) tasks.sort(key=lambda x: - (x['tuple_size'][0] + x['tuple_size'][2]) * x['tuple_size'][1], - reverse=True) + (x['tuple_size'][0] + x['tuple_size'][2]) * x['tuple_size'][1], + reverse=True) # cycle through waitpool, and see if we get anything placed now. scheduled, unscheduled = ru.lazy_bisect(tasks, @@ -666,8 +666,8 @@ def _schedule_waitpool(self): on_skip=self._prof_sched_skip, log=self._log) - for task in scheduled: - self._prof.prof('schedule_wait', uid=task['uid']) + # for task in scheduled: + # self._prof.prof('schedule_wait', uid=task['uid']) self._waitpool = {task['uid']:task for task in unscheduled} @@ -731,7 +731,7 @@ def _schedule_incoming(self): # task got scheduled - advance state, notify world about the # state change, and push it out toward the next component. - self._prof.prof('schedule_first', uid=task['uid']) + # self._prof.prof('schedule_first', uid=task['uid']) td = task['description'] task['$set'] = ['resources'] task['resources'] = {'cpu': td['cpu_processes'] * @@ -840,40 +840,41 @@ def _unschedule_completed(self): # found one - swap the slots and push out to executor replace['slots'] = task['slots'] - placed.append(placed) + placed.append(replace['uid']) # unschedule task A and schedule task B have the same # timestamp ts = time.time() self._prof.prof('unschedule_stop', uid=task['uid'], ts=ts) - self._prof.prof('schedule_fast', uid=replace['uid'], ts=ts) + # self._prof.prof('schedule_fast', uid=replace['uid'], ts=ts) + self._prof.prof('schedule_ok', uid=replace['uid'], ts=ts) self.advance(replace, rps.AGENT_EXECUTING_PENDING, publish=True, push=True) - to_release.append(task) - - if not to_release: - if not to_unschedule: - # no new resources, not been active - return False, False - else: - # no new resources, but activity - return False, True # we have tasks to unschedule, which will free some resources. We can # thus try to schedule larger tasks again, and also inform the caller # about resource availability. for task in to_release: self.unschedule_task(task) - self._prof.prof('unschedule_stop', uid=task['uid']) # if previously waiting tasks were placed, remove them from the waitpool if placed: - self._waitpool = {task['uid'] : task - for task in self._waitpool.values() - if task['uid'] not in placed} + for uid in placed: + del(self._waitpool[uid]) + + if to_release: + # new resources, activity + return True, True + elif to_unschedule: + # no new resources, but activity + return False, True + else: + # no new resources, no activity + return False, False + + # we have new resources, and were active - return True, True # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index d0d1b037f9..b668af1811 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -467,7 +467,7 @@ def start(self): if not self._thread.is_alive(): raise RuntimeError('worker thread died during initialization') - time.sleep(0.1) + time.sleep(0.01) assert(self._thread.is_alive()) @@ -929,7 +929,7 @@ def run(self): if self._timeout and \ self._timeout > (time.time() - self._last): # not yet - time.sleep(0.1) # FIXME: make configurable + time.sleep(0.01) # FIXME: make configurable continue with self._cb_lock: @@ -1044,7 +1044,7 @@ def work_cb(self): # if no action occurs in this iteration, idle if not self._inputs: - time.sleep(0.1) + time.sleep(0.01) return True for name in self._inputs: @@ -1055,8 +1055,7 @@ def work_cb(self): # FIXME: a simple, 1-thing caching mechanism would likely # remove the req/res overhead completely (for any # non-trivial worker). - things = input.get_nowait(500) # in microseconds - things = ru.as_list(things) + things = input.get_nowait(200) # in microseconds if not things: return True @@ -1065,10 +1064,10 @@ def work_cb(self): # need to sort the things into buckets by state before # pushing them buckets = dict() - for thing in things: + for thing in ru.as_list(things): state = thing.get('state') # can be stateless uid = thing.get('uid') # and not have uids - self._prof.prof('get', uid=uid, state=state) + # self._prof.prof('get', uid=uid, state=state) if state not in buckets: buckets[state] = list() @@ -1239,16 +1238,16 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, # unknown target state -- error for thing in _things: self._log.debug("lost %s [%s]", thing['uid'], _state) - self._prof.prof('lost', uid=thing['uid'], state=_state, - ts=ts) + # self._prof.prof('lost', uid=thing['uid'], state=_state, + # ts=ts) continue if not self._outputs[_state]: # empty output -- drop thing for thing in _things: self._log.debug('drop %s [%s]', thing['uid'], _state) - self._prof.prof('drop', uid=thing['uid'], state=_state, - ts=ts) + # self._prof.prof('drop', uid=thing['uid'], state=_state, + # ts=ts) continue output = self._outputs[_state] @@ -1258,9 +1257,9 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, output.put(_things) ts = time.time() - for thing in _things: - self._prof.prof('put', uid=thing['uid'], state=_state, - msg=output.name, ts=ts) + # for thing in _things: + # self._prof.prof('put', uid=thing['uid'], state=_state, + # msg=output.name, ts=ts) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/utils/prof_utils.py b/src/radical/pilot/utils/prof_utils.py index c575e143b7..52f91ebc4b 100644 --- a/src/radical/pilot/utils/prof_utils.py +++ b/src/radical/pilot/utils/prof_utils.py @@ -55,6 +55,8 @@ # # resources on agent nodes are consumed for all of the pilot's lifetime 'agent' : { + 'total' : [{ru.EVENT: 'bootstrap_0_start'}, + {ru.EVENT: 'bootstrap_0_stop' }] } } From 57c385e8ea98ff58ce1eeb121f1ff44b87c4b0f6 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 16 Feb 2021 00:26:03 +0100 Subject: [PATCH 006/171] snapshot --- src/radical/pilot/agent/agent_0.py | 20 +- src/radical/pilot/agent/bootstrap_0.sh | 2 - .../pilot/agent/executing/archive/abds.py | 6 +- .../pilot/agent/executing/archive/orte.py | 6 +- src/radical/pilot/agent/executing/popen.py | 15 +- src/radical/pilot/agent/executing/shell.py | 6 +- src/radical/pilot/agent/executing/shell_fs.py | 6 +- src/radical/pilot/agent/executing/sleep.py | 40 ++- src/radical/pilot/agent/scheduler/base.py | 258 +++++++++++------- .../pilot/agent/scheduler/continuous.py | 24 +- .../pilot/tmgr/staging_input/default.py | 3 - src/radical/pilot/utils/component.py | 76 +++--- src/radical/pilot/utils/db_utils.py | 6 +- 13 files changed, 282 insertions(+), 186 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 48d4253f78..7f8d1bed7b 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -9,6 +9,7 @@ import stat import time import pprint +import threading as mt import subprocess as sp import multiprocessing as mp @@ -202,8 +203,10 @@ def initialize(self): timer=self._cfg['db_poll_sleeptime']) # register idle callback to pull for tasks - self.register_timed_cb(self._check_tasks_cb, - timer=self._cfg['db_poll_sleeptime']) + self._ingest = mt.Thread(target=self._ingest) + self._ingest.daemon = True + self._ingest.start() + # sub-agents are started, components are started, bridges are up: we are # ready to roll! Update pilot state. @@ -669,6 +672,14 @@ def _check_state(self): return True + # -------------------------------------------------------------------------- + # + def _ingest(self): + + while not self._term.is_set(): + self._check_tasks_cb() + + # -------------------------------------------------------------------------- # def _check_tasks_cb(self): @@ -686,7 +697,8 @@ def _check_tasks_cb(self): 'control' : 'agent_pending'}) if not task_cursor.count(): self._log.info('tasks pulled: 0') - return True + time.sleep(self._cfg['db_poll_sleeptime']) + return # update the tasks to avoid pulling them again next time. task_list = list(task_cursor) @@ -724,8 +736,6 @@ def _check_tasks_cb(self): # since that happened already on the module side when the state was set. self.advance(task_list, publish=False, push=True) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/agent/bootstrap_0.sh b/src/radical/pilot/agent/bootstrap_0.sh index 3bdaa7c920..b7e0238e0b 100755 --- a/src/radical/pilot/agent/bootstrap_0.sh +++ b/src/radical/pilot/agent/bootstrap_0.sh @@ -1905,7 +1905,6 @@ do sleep 3 if kill -0 $AGENT_PID 2>/dev/null then - echo -n '.' if test -e "./killme.signal" then profile_event 'killme' "`date --rfc-3339=ns | cut -c -23`" @@ -1920,7 +1919,6 @@ do kill -9 $AGENT_PID fi else - echo profile_event 'agent_gone' "`date --rfc-3339=ns | cut -c -23`" echo "agent $AGENT_PID is gone" break diff --git a/src/radical/pilot/agent/executing/archive/abds.py b/src/radical/pilot/agent/executing/archive/abds.py index ec04c68790..b2bf4e25eb 100644 --- a/src/radical/pilot/agent/executing/archive/abds.py +++ b/src/radical/pilot/agent/executing/archive/abds.py @@ -212,7 +212,7 @@ def _handle_task(self, task): # Free the Slots, Flee the Flots, Ree the Frots! if task['slots']: - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.FAILED, publish=True, push=False) @@ -497,7 +497,7 @@ def _check_running(self): self._tasks_to_watch.remove(task) del(task['proc']) # proc is not json serializable - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.CANCELED, publish=True, push=False) else: @@ -516,7 +516,7 @@ def _check_running(self): # Free the Slots, Flee the Flots, Ree the Frots! self._tasks_to_watch.remove(task) del(task['proc']) # proc is not json serializable - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) if exit_code != 0: # The task failed - fail after staging output diff --git a/src/radical/pilot/agent/executing/archive/orte.py b/src/radical/pilot/agent/executing/archive/orte.py index 8591f3986a..4f4f309a09 100755 --- a/src/radical/pilot/agent/executing/archive/orte.py +++ b/src/radical/pilot/agent/executing/archive/orte.py @@ -220,7 +220,7 @@ def _handle_task(self, task): # Free the Slots, Flee the Flots, Ree the Frots! if task['slots']: - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.FAILED, publish=True, push=False) @@ -240,7 +240,7 @@ def task_spawned_cb(self, task, status): # task launch failed self._prof.prof('exec_fail', uid=uid) self._log.error("task %s startup failed: %s", uid, status) - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) task['target_state'] = rps.FAILED self.advance(task, rps.AGENT_STAGING_OUTPUT_PENDING, @@ -268,7 +268,7 @@ def task_completed_cb(self, task, exit_code): task['exit_code'] = exit_code task['finished'] = timestamp - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) if exit_code != 0: # task failed - fail after staging output diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index 87847da962..bc5d03be70 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -183,7 +183,7 @@ def _handle_task(self, task): # Free the Slots, Flee the Flots, Ree the Frots! self._prof.prof('unschedule_start', uid=task['uid']) - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.FAILED, publish=True, push=False) @@ -397,6 +397,9 @@ def _check_running(self): exit_code = task['proc'].poll() uid = task['uid'] + to_unschedule = list() + to_publish = list() + if exit_code is None: # Process is still running @@ -427,7 +430,7 @@ def _check_running(self): del(task['proc']) # proc is not json serializable self._prof.prof('unschedule_start', uid=task['uid']) - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + to_unschedule.append(task) self.advance(task, rps.CANCELED, publish=True, push=False) # we don't need to watch canceled tasks @@ -450,7 +453,7 @@ def _check_running(self): self._tasks_to_watch.remove(task) del(task['proc']) # proc is not json serializable self._prof.prof('unschedule_start', uid=task['uid']) - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + to_unschedule.append(task) if exit_code != 0: # The task failed - fail after staging output @@ -462,7 +465,11 @@ def _check_running(self): # directives -- at the very least, we'll upload stdout/stderr task['target_state'] = rps.DONE - self.advance(task, rps.AGENT_STAGING_OUTPUT_PENDING, publish=True, push=True) + if to_publish: + self.advance(to_publish, rps.AGENT_STAGING_OUTPUT_PENDING, + publish=True, push=True) + if to_unschedule: + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, to_unschedule) return action diff --git a/src/radical/pilot/agent/executing/shell.py b/src/radical/pilot/agent/executing/shell.py index 40cc74a125..8a48b41744 100644 --- a/src/radical/pilot/agent/executing/shell.py +++ b/src/radical/pilot/agent/executing/shell.py @@ -199,7 +199,7 @@ def _handle_task(self, task): with self._cancel_lock: self._tasks_to_cancel.remove(task['uid']) - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.CANCELED, publish=True, push=False) return True @@ -263,7 +263,7 @@ def _handle_task(self, task): # Free the Slots, Flee the Flots, Ree the Frots! if task.get('slots'): - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.FAILED, publish=True, push=False) @@ -576,7 +576,7 @@ def _handle_event (self, task, pid, state, data) : self._prof.prof('exec_stop', uid=task['uid']) # for final states, we can free the slots. - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) if data : task['exit_code'] = int(data) else : task['exit_code'] = None diff --git a/src/radical/pilot/agent/executing/shell_fs.py b/src/radical/pilot/agent/executing/shell_fs.py index 41b4d61a72..df6252aaa6 100644 --- a/src/radical/pilot/agent/executing/shell_fs.py +++ b/src/radical/pilot/agent/executing/shell_fs.py @@ -226,7 +226,7 @@ def _handle_task(self, task): with self._cancel_lock: self._to_cancel.remove(task['uid']) - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.CANCELED, publish=True, push=False) return True @@ -256,7 +256,7 @@ def _handle_task(self, task): # Free the Slots, Flee the Flots, Ree the Frots! if task.get('slots'): - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) self.advance(task, rps.FAILED, publish=True, push=False) @@ -466,7 +466,7 @@ def _handle_event (self, msg): del(self._registry[uid]) # free task slots. - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, task) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, [task]) if ret is None: task['exit_code'] = None diff --git a/src/radical/pilot/agent/executing/sleep.py b/src/radical/pilot/agent/executing/sleep.py index c1b63b5119..33d9b1154a 100644 --- a/src/radical/pilot/agent/executing/sleep.py +++ b/src/radical/pilot/agent/executing/sleep.py @@ -44,9 +44,9 @@ def initialize(self): self._terminate = mt.Event() self._tasks_lock = mt.RLock() self._tasks = list() - self._delay = 0.1 + self._delay = 1.0 - self._watcher = mt.Thread(target=self._timed) + self._watcher = mt.Thread(target=self._collect) self._watcher.daemon = True self._watcher.start() @@ -69,10 +69,6 @@ def work(self, tasks): self.advance(tasks, rps.AGENT_EXECUTING, publish=True, push=False) now = time.time() - for t in tasks: - # assert(t['description']['executable'].endswith('sleep')) - t['to_finish'] = now + float(t['description']['arguments'][0]) - for t in tasks: uid = t['uid'] self._prof.prof('exec_start', uid=uid) @@ -81,34 +77,50 @@ def work(self, tasks): self._prof.prof('task_exec_start', uid=uid) self._prof.prof('app_start', uid=uid) + t['to_finish'] = now + float(t['description']['arguments'][0]) + + self._log.debug('=== started new tasks : %d', len(tasks)) + with self._tasks_lock: self._tasks.extend(tasks) # -------------------------------------------------------------------------- # - def _timed(self): + def _collect(self): while not self._terminate.is_set(): with self._tasks_lock: - now = time.time() - to_finish = [t for t in self._tasks if t['to_finish'] <= now] - self._tasks = [t for t in self._tasks if t['to_finish'] > now] - # if not to_finish: - time.sleep(self._delay) - # continue + to_finish = list() + to_continue = list() + now = time.time() + for task in self._tasks: + if task['to_finish'] <= now: to_finish.append(task) + else : to_continue.append(task) + + self._tasks = to_continue + + if not to_finish: + time.sleep(self._delay) + continue + + uids = list() for t in to_finish: uid = t['uid'] + uids.append(uid) t['target_state'] = 'DONE' self._prof.prof('app_stop', uid=uid) self._prof.prof('task_exec_stop', uid=uid) self._prof.prof('task_stop', uid=uid) self._prof.prof('exec_stop', uid=uid) self._prof.prof('unschedule_start', uid=uid) - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, t) + + self._log.debug('=== collected : %d', len(to_finish)) + + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, to_finish) self.advance(to_finish, rps.AGENT_STAGING_OUTPUT_PENDING, publish=True, push=True) diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index ea2546f48e..e1f69b6931 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -36,6 +36,9 @@ # SCHEDULER_NAME_SCATTERED = "SCATTERED" +CHUNKSIZE = 1024 # break sort of incoming tasks not and then to + # actually schedule them + # ------------------------------------------------------------------------------ # # An RP agent scheduler will place incoming tasks onto a set of cores and gpus. @@ -454,7 +457,7 @@ def slot_status(self, msg=None): ret += glyphs[gpu] ret += '|' - self._log.debug("status: %-30s: %s", msg, ret) + # self._log.debug("==== status: %-30s: %s", msg, ret) return ret @@ -534,7 +537,12 @@ def unschedule_cb(self, topic, msg): release (for whatever reason) all slots allocated to this task ''' + if not isinstance(msg, list): + self._log.error('msg is not list: %s', msg) + msg = [msg] + self._queue_unsched.put(msg) + # self._log.debug('=== unscheduled from queue : %d', len(msg)) # return True to keep the cb registered return True @@ -596,24 +604,22 @@ def _schedule_tasks(self): resources = True # fresh start, all is free while not self._proc_term.is_set(): - # self._log.debug('=== schedule tasks 0: %s, w: %d', resources, - # len(self._waitpool)) + # idle if this iteration changes no state + old_state = [resources, len(self._waitpool)] - active = 0 # see if we do anything in this iteration + # self._log.debug('==== schedule tasks 0: RX %s [%d]', resources, len(self._waitpool)) # if we have new resources, try to place waiting tasks. r_wait = False if resources: - r_wait, a = self._schedule_waitpool() - active += int(a) - # self._log.debug('=== schedule tasks w: %s %s', r_wait, a) + r_wait = self._schedule_waitpool() + # self._log.debug('==== scheduled tasks w: RX %s %s', resources, r_wait) # always try to schedule newly incoming tasks # running out of resources for incoming could still mean we have # smaller slots for waiting tasks, so ignore `r` for now. - r_inc, a = self._schedule_incoming() - active += int(a) - # self._log.debug('=== schedule tasks i: %s %s', r_inc, a) + r_inc = self._schedule_incoming() + # self._log.debug('==== scheduled tasks i: RX %s %s', resources, r_inc) # if we had resources, but could not schedule any incoming not any # waiting, then we effectively ran out of *useful* resources @@ -624,17 +630,14 @@ def _schedule_tasks(self): # if tasks got unscheduled (and not replaced), then we have new # space to schedule waiting tasks (unless we have resources from # before) - r, a = self._unschedule_completed() + r = self._unschedule_completed() if not resources and r: resources = True - active += int(a) - # self._log.debug('=== schedule tasks c: %s %s', r, a) - - if not active: - time.sleep(0.01) # FIXME: configurable - - # self._log.debug('=== schedule tasks x: %s %s', resources, active) + # self._log.debug('==== scheduled tasks c: RX %s %s', resources, r) + # idle if this iteration changes no state + if old_state != [resources, len(self._waitpool)]: + time.sleep(0.1) # -------------------------------------------------------------------------- # @@ -648,7 +651,9 @@ def _prof_sched_skip(self, task): # def _schedule_waitpool(self): - # self.slot_status("before schedule waitpool") + # self._log.debug("==== schedule waitpool %d waiting" % len(self._waitpool)) + + resources = None # default: no change to resource state # sort by inverse tuple size to place larger tasks first and backfill # with smaller tasks. We only look at cores right now - this needs @@ -656,115 +661,149 @@ def _schedule_waitpool(self): # We define `tuple_size` as # `(cpu_processes + gpu_processes) * cpu_threads` # + # FIXME: cache tuple size metric + # FIXME: only resort waitpool if we need to tasks = list(self._waitpool.values()) tasks.sort(key=lambda x: (x['tuple_size'][0] + x['tuple_size'][2]) * x['tuple_size'][1], reverse=True) + # self._log.debug("==== schedule waitpool %d", len(tasks)) # cycle through waitpool, and see if we get anything placed now. scheduled, unscheduled = ru.lazy_bisect(tasks, check=self._try_allocation, on_skip=self._prof_sched_skip, log=self._log) + # self._log.debug("==== schedules waitpool %d", len(scheduled)) # for task in scheduled: # self._prof.prof('schedule_wait', uid=task['uid']) - self._waitpool = {task['uid']:task for task in unscheduled} + # we only need to re-create the waitpool if any tasks were scheduled + if scheduled: + self._waitpool = {task['uid']:task for task in unscheduled} - # update task resources - for task in scheduled: - td = task['description'] - task['$set'] = ['resources'] - task['resources'] = {'cpu': td['cpu_processes'] * - td.get('cpu_threads', 1), - 'gpu': td['gpu_processes']} - self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, publish=True, - push=True) + # if unscheduled tasks remain in the waitpool, we ran out of resources + if unscheduled and self._waitpool: + if self._waitpool: + resources = False - # method counts as `active` if anything was scheduled - active = bool(scheduled) + if scheduled: + self._log.debug('=== scheduled from waitpool: %d', len(scheduled)) - # if we sccheduled some tasks but not all, we ran out of resources - resources = not (bool(unscheduled) and bool(unscheduled)) + # update task resources + for task in scheduled: + td = task['description'] + task['$set'] = ['resources'] + task['resources'] = {'cpu': td['cpu_processes'] * + td.get('cpu_threads', 1), + 'gpu': td['gpu_processes']} + self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, publish=True, + push=True) - # self.slot_status("after schedule waitpool") - return resources, active + # self._log.debug("==== after schedule waitpool %s: %d waiting", + # resources, len(self._waitpool)) + return resources # -------------------------------------------------------------------------- # def _schedule_incoming(self): - # fetch all tasks from the queue - tasks = list() - try: + # self._log.debug("==== before schedule incoming: waiting: %d", + # len(self._waitpool)) - while not self._proc_term.is_set(): - data = self._queue_sched.get(timeout=0.001) + resources = None # n o change in resource status - if not isinstance(data, list): - data = [data] + # we try to schedule incoming tasks as long as they ar incoming. The + # tasks do also get sorted by size (just like we do for the waitpool), + # but that happens only in chunks of `CHUNKSIZE` + while True: - for task in data: - self._set_tuple_size(task) - tasks.append(task) + # fetch all tasks from the queue + tasks = list() + try: - except queue.Empty: - # no more unschedule requests - pass + while not self._proc_term.is_set(): + data = self._queue_sched.get(timeout=0.001) - if not tasks: - # no resource change, no activity - return None, False + for task in data: + self._set_tuple_size(task) + tasks.append(task) - # self.slot_status("before schedule incoming [%d]" % len(tasks)) + if len(tasks) > CHUNKSIZE: + # stop collecting, schedule what we have, only continue + # here when all tasks can be scheduled + # self._log.debug('==== break for chunk %d', CHUNKSIZE) + break - # handle largest tasks first - # FIXME: this needs lazy-bisect - to_wait = list() - for task in sorted(tasks, key=lambda x: x['tuple_size'][0], - reverse=True): + except queue.Empty: + # no more unschedule requests + pass - # either we can place the task straight away, or we have to - # put it in the wait pool. - if self._try_allocation(task): + if not tasks: + # self._log.debug('==== return for empty queue') + # no resource change, no activity + return None - # task got scheduled - advance state, notify world about the - # state change, and push it out toward the next component. - # self._prof.prof('schedule_first', uid=task['uid']) - td = task['description'] - task['$set'] = ['resources'] - task['resources'] = {'cpu': td['cpu_processes'] * - td.get('cpu_threads', 1), - 'gpu': td['gpu_processes']} - self.advance(task, rps.AGENT_EXECUTING_PENDING, - publish=True, push=True) + self._log.debug("==== schedule incoming [%d]", len(tasks)) - else: - to_wait.append(task) + # handle largest tasks first + # FIXME: this needs lazy-bisect - # all tasks which could not be scheduled are added to the waitpool - self._waitpool.update({task['uid']:task for task in to_wait}) + tasks.sort(key=lambda x: x['tuple_size'][0], reverse=True) + scheduled, unscheduled = ru.lazy_bisect(tasks, + check=self._try_allocation, + on_skip=self._prof_sched_skip, + log=self._log) + if scheduled: + for task in scheduled: + + # task got scheduled - advance state, notify world about the + # state change, and push it out toward the next component. + self._prof.prof('schedule_first', uid=task['uid']) + td = task['description'] + task['$set'] = ['resources'] + task['resources'] = {'cpu': td['cpu_processes'] * + td.get('cpu_threads', 1), + 'gpu': td['gpu_processes']} + + self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, + publish=True, push=True) + + # all tasks which could not be scheduled are added to the waitpool + if unscheduled: + self._waitpool.update({task['uid']:task for task in unscheduled}) + + # incoming tasks which have to wait are the only reason to + # rebuild the tuple_size map. + self._ts_valid = False + + # if tasks remain waiting, we are out of usable resources + resources = False - # we performed some activity (worked on tasks) - active = True + self._log.debug('=== unscheduled incoming: %d', len(scheduled)) + self._log.debug('=== scheduled incoming: %d', len(unscheduled)) - # if tasks remain waiting, we are out of usable resources - resources = not bool(to_wait) + # if we could not schedule any task from the last chunk, then we + # should break to allow the unschedule to kick in + # NOTE: new incoming tasks *may* have a chance to get scheduled, so + # this is a lucky guess + if unscheduled: + break - # incoming tasks which have to wait are the only reason to rebuild the - # tuple_size map - self._ts_valid = False + self._log.debug("==== after schedule incoming: waiting: %d", + len(self._waitpool)) - # self.slot_status("after schedule incoming") - return resources, active + return resources # -------------------------------------------------------------------------- # def _unschedule_completed(self): + # self._log.debug("==== unschedule completed") + to_unschedule = list() try: @@ -781,23 +820,28 @@ def _unschedule_completed(self): # in a max added latency of about 0.1 second, which is one order of # magnitude above our noise level again and thus acceptable (tm). while not self._proc_term.is_set(): - task = self._queue_unsched.get(timeout=0.001) - to_unschedule.append(task) - if len(to_unschedule) > 128: + tasks = self._queue_unsched.get(timeout=0.001) + to_unschedule.extend(tasks) + # self._log.debug('=== unscheduled to batch : %d', len(tasks)) + if len(to_unschedule) > CHUNKSIZE: break except queue.Empty: # no more unschedule requests pass - to_release = list() # slots of unscheduling tasks - placed = list() # uids of waiting tasks replacing unscheduled ones - + # self._log.debug("==== unschedule completed %d", len(to_unschedule)) if to_unschedule: # rebuild the tuple_size binning, maybe + self._log.debug('=== unscheduled refresh : %d', len(to_unschedule)) self._refresh_ts_map() + self._log.debug('=== unscheduled refreshed : %d', len(to_unschedule)) + + to_release = list() # unscheduling tasks to release slots from + placed = list() # uids of tasks replacing unscheduled ones + to_advance = list() # tasks to move to the scheduler for task in to_unschedule: # if we find a waiting task with the same tuple size, we don't free @@ -847,35 +891,37 @@ def _unschedule_completed(self): # timestamp ts = time.time() self._prof.prof('unschedule_stop', uid=task['uid'], ts=ts) - # self._prof.prof('schedule_fast', uid=replace['uid'], ts=ts) + self._prof.prof('schedule_fast', uid=replace['uid'], ts=ts) self._prof.prof('schedule_ok', uid=replace['uid'], ts=ts) - self.advance(replace, rps.AGENT_EXECUTING_PENDING, - publish=True, push=True) + to_advance.append(replace) + + if to_advance: + self._log.debug('=== unscheduled advance : %d', len(to_advance)) + self.advance(to_advance, rps.AGENT_EXECUTING_PENDING, + publish=True, push=True) # we have tasks to unschedule, which will free some resources. We can # thus try to schedule larger tasks again, and also inform the caller # about resource availability. - for task in to_release: - self.unschedule_task(task) + # self._log.debug("==== release completed %d", len(to_release)) + if to_release: + for task in to_release: + self.unschedule_task(task) + self._prof.prof('unschedule_stop', uid=task['uid']) + + self._log.debug('=== unscheduled release : %d', len(to_release)) # if previously waiting tasks were placed, remove them from the waitpool + # self._log.debug("==== scheduled completed %d", len(placed)) if placed: for uid in placed: del(self._waitpool[uid]) - if to_release: - # new resources, activity - return True, True - elif to_unschedule: - # no new resources, but activity - return False, True - else: - # no new resources, no activity - return False, False + # self._log.debug("=== unscheduled and replaced : %d / %d", len(to_unschedule), len(placed)) - - # we have new resources, and were active + if to_release: return True # new resources + else : return False # -------------------------------------------------------------------------- @@ -917,6 +963,8 @@ def _try_allocation(self, task): # def _handle_cuda(self, task): + return + # Check if task requires GPUs. If so, set CUDA_VISIBLE_DEVICES to the # list of assigned GPU IDs. We only handle uniform GPU setting for # now, and will isse a warning on non-uniform ones. diff --git a/src/radical/pilot/agent/scheduler/continuous.py b/src/radical/pilot/agent/scheduler/continuous.py index e5897b9995..85c9a8cfa2 100644 --- a/src/radical/pilot/agent/scheduler/continuous.py +++ b/src/radical/pilot/agent/scheduler/continuous.py @@ -2,6 +2,8 @@ __copyright__ = "Copyright 2013-2016, http://radical.rutgers.edu" __license__ = "MIT" +import pprint + import math as m from ... import constants as rpc @@ -219,6 +221,8 @@ def _find_resources(self, node, find_slots, cores_per_slot, gpus_per_slot, thread count and using physical core IDs for process placement? ''' + # self._log.debug('=== find on %s: %s * [%s, %s]', node['uid'], ) + # check if the node can host the request free_cores = node['cores'].count(rpc.FREE) free_gpus = node['gpus'].count(rpc.FREE) @@ -231,13 +235,13 @@ def _find_resources(self, node, find_slots, cores_per_slot, gpus_per_slot, alc_slots = int(m.floor(free_cores / cores_per_slot)) if gpus_per_slot: - alc_slots = min(alc_slots, int(m.floor(free_gpus / gpus_per_slot ))) + alc_slots = min(alc_slots, int(m.floor(free_gpus / gpus_per_slot))) if lfs_per_slot: - alc_slots = min(alc_slots, int(m.floor(free_lfs / lfs_per_slot ))) + alc_slots = min(alc_slots, int(m.floor(free_lfs / lfs_per_slot))) if mem_per_slot: - alc_slots = min(alc_slots, int(m.floor(free_mem / mem_per_slot ))) + alc_slots = min(alc_slots, int(m.floor(free_mem / mem_per_slot))) # is this enough? if not alc_slots: @@ -359,22 +363,30 @@ def schedule_task(self, task): assert(mem_per_slot <= mem_per_node), 'too much mem per proc %s' % mem_per_slot # check what resource type limits teh number of slots per node + t = list() slots_per_node = int(m.floor(cores_per_node / cores_per_slot)) + t.append([cores_per_node, cores_per_slot, slots_per_node]) if gpus_per_slot: slots_per_node = min(slots_per_node, - int(m.floor(gpus_per_node / gpus_per_slot ))) + int(m.floor(gpus_per_node / gpus_per_slot))) + t.append([gpus_per_node, gpus_per_slot, slots_per_node]) if lfs_per_slot: slots_per_node = min(slots_per_node, int(m.floor(lfs_per_node / lfs_per_slot))) + t.append([lfs_per_node, lfs_per_slot, slots_per_node]) if mem_per_slot: slots_per_node = min(slots_per_node, int(m.floor(mem_per_node / mem_per_slot))) + t.append([mem_per_node, mem_per_slot, slots_per_node]) if not mpi and req_slots > slots_per_node: - raise ValueError('non-mpi task does not fit on a single node') + raise ValueError('non-mpi task does not fit on a single node:' + '%s * %s:%s > %s:%s -- %s > %s [%s %s] %s' % (req_slots, cores_per_slot, + gpus_per_slot, cores_per_node, gpus_per_node, req_slots, + slots_per_node, cores_per_slot, gpus_per_slot, t)) # set conditions to find the first matching node is_first = True @@ -410,7 +422,7 @@ def schedule_task(self, task): for node in self._iterate_nodes(): node_uid = node['uid'] - # node_name = node['name'] + node_name = node['name'] # self._log.debug('next %s : %s', node_uid, node_name) # self._log.debug('req1: %s = %s + %s', req_slots, rem_slots, diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 1a358db597..d563206954 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -110,9 +110,6 @@ def _base_command_cb(self, topic, msg): # def work(self, tasks): - if not isinstance(tasks, list): - tasks = [tasks] - self.advance(tasks, rps.TMGR_STAGING_INPUT, publish=True, push=False) # we first filter out any tasks which don't need any input staging, and diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index b668af1811..2e56d4bbf4 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -19,6 +19,9 @@ def out(msg): sys.stdout.flush() +CHUNKSIZE = 1024 + + # ------------------------------------------------------------------------------ # class ComponentManager(object): @@ -1049,6 +1052,8 @@ def work_cb(self): for name in self._inputs: + # self._log.debug('== work_cb %s', name) + input = self._inputs[name]['queue'] states = self._inputs[name]['states'] @@ -1060,14 +1065,15 @@ def work_cb(self): if not things: return True + self._log.debug('== work_cb %s got %d ', name, len(things)) + # the worker target depends on the state of things, so we # need to sort the things into buckets by state before # pushing them buckets = dict() - for thing in ru.as_list(things): + for thing in things: state = thing.get('state') # can be stateless uid = thing.get('uid') # and not have uids - # self._prof.prof('get', uid=uid, state=state) if state not in buckets: buckets[state] = list() @@ -1081,26 +1087,30 @@ def work_cb(self): assert(state in self._workers), 'no worker for state %s' % state try: - to_cancel = list() - - for thing in things: - - uid = thing.get('uid') + # filter out canceled things + if self._cancel_list: # FIXME: this can become expensive over time # if the cancel list is never cleaned - if uid and uid in self._cancel_list: - with self._cancel_lock: - self._cancel_list.remove(uid) - to_cancel.append(thing) - - self._log.debug('got %s (%s)', uid, state) - - if to_cancel: - # only advance stateful entities, otherwise just drop - if state: - self.advance(to_cancel, rps.CANCELED, publish=True, - push=False) + to_cancel = list() + with self._cancel_lock: + if thing['uid'] in self._cancel_list: + to_cancel.append(thing) + + self._cancel_list = [x for x in self._cancel_list + if x not in to_cancel] + + if to_cancel: + # only advance stateful entities, otherwise just drop + if state: + self.advance(to_cancel, rps.CANCELED, + publish=True, push=False) + + + # self._log.debug('== got %d things (%s)', len(things), state) + # for thing in things: + # self._log.debug('got %s (%s)', thing['uid'], state) + with self._work_lock: self._workers[state](things) @@ -1152,7 +1162,7 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, things = ru.as_list(things) - self._log.debug('advance bulk: %s [%s, %s]', len(things), push, publish) + # self._log.debug('advance bulk: %s [%s, %s]', len(things), push, publish) # assign state, sort things by state buckets = dict() @@ -1228,32 +1238,32 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, # ts = time.time() if _state in rps.FINAL: # things in final state are dropped - for thing in _things: - self._log.debug('final %s [%s]', thing['uid'], _state) - self._prof.prof('drop', uid=thing['uid'], state=_state, - ts=ts) + # for thing in _things: + # self._log.debug('final %s [%s]', thing['uid'], _state) + # self._prof.prof('drop', uid=thing['uid'], state=_state, + # ts=ts) continue if _state not in self._outputs: # unknown target state -- error - for thing in _things: - self._log.debug("lost %s [%s]", thing['uid'], _state) - # self._prof.prof('lost', uid=thing['uid'], state=_state, - # ts=ts) + # for thing in _things: + # # self._log.debug("lost %s [%s]", thing['uid'], _state) + # # self._prof.prof('lost', uid=thing['uid'], state=_state, + # # ts=ts) continue if not self._outputs[_state]: # empty output -- drop thing - for thing in _things: - self._log.debug('drop %s [%s]', thing['uid'], _state) - # self._prof.prof('drop', uid=thing['uid'], state=_state, - # ts=ts) + # for thing in _things: + # self._log.debug('drop %s [%s]', thing['uid'], _state) + # # self._prof.prof('drop', uid=thing['uid'], state=_state, + # # ts=ts) continue output = self._outputs[_state] # push the thing down the drain - self._log.debug('put bulk %s: %s', _state, len(_things)) + # self._log.debug('put bulk %s: %s', _state, len(_things)) output.put(_things) ts = time.time() diff --git a/src/radical/pilot/utils/db_utils.py b/src/radical/pilot/utils/db_utils.py index 22cb911cc1..31299fd108 100644 --- a/src/radical/pilot/utils/db_utils.py +++ b/src/radical/pilot/utils/db_utils.py @@ -71,7 +71,7 @@ def get_session_docs(db, sid, cache=None, cachedir=None): try: if os.path.isfile (cache): # print 'using cache: %s' % cache - return ru.read_json (cache) + return ru.read_json(cache, filter_comments=False) except Exception as e: # continue w/o cache sys.stderr.write("cannot read session cache at %s (%s)\n" % (cache, e)) @@ -110,7 +110,9 @@ def get_session_docs(db, sid, cache=None, cachedir=None): # to the cache try: os.system ('mkdir -p %s' % cachedir) - ru.write_json (json_data, "%s/%s.json" % (cachedir, sid)) + with open("%s/%s.json" % (cachedir, sid)) as fout: + fout.write(ru.as_bytes(json_data)) + except Exception: # we can live without cache, no problem... pass From 2d2b11a675aee56d554d680657e6895235500f59 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 16 Feb 2021 15:25:07 +0100 Subject: [PATCH 007/171] fix task advance in popen --- src/radical/pilot/agent/executing/popen.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index bc5d03be70..5cb6363edb 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -397,8 +397,8 @@ def _check_running(self): exit_code = task['proc'].poll() uid = task['uid'] - to_unschedule = list() - to_publish = list() + to_advance = list() + to_cancel = list() if exit_code is None: # Process is still running @@ -430,8 +430,7 @@ def _check_running(self): del(task['proc']) # proc is not json serializable self._prof.prof('unschedule_start', uid=task['uid']) - to_unschedule.append(task) - self.advance(task, rps.CANCELED, publish=True, push=False) + to_cancel.append(task) # we don't need to watch canceled tasks self._tasks_to_watch.remove(task) @@ -453,7 +452,7 @@ def _check_running(self): self._tasks_to_watch.remove(task) del(task['proc']) # proc is not json serializable self._prof.prof('unschedule_start', uid=task['uid']) - to_unschedule.append(task) + to_advance.append(task) if exit_code != 0: # The task failed - fail after staging output @@ -465,11 +464,13 @@ def _check_running(self): # directives -- at the very least, we'll upload stdout/stderr task['target_state'] = rps.DONE - if to_publish: - self.advance(to_publish, rps.AGENT_STAGING_OUTPUT_PENDING, + if to_cancel: + self.advance(to_cancel, rps.CANCELED, + publish=True, push=False) + if to_advance: + self.advance(to_advance, rps.AGENT_STAGING_OUTPUT_PENDING, publish=True, push=True) - if to_unschedule: - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, to_unschedule) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, to_cancel + to_advance) return action From 883b30876c828bd5059ad20e3271fa15d7875298 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 17 Feb 2021 22:49:40 +0100 Subject: [PATCH 008/171] snapshot --- bin/radical-pilot-bridge | 8 +- examples/00_getting_started.py | 26 ++- examples/01_task_details.py | 9 +- src/radical/pilot/agent/agent_0.py | 56 ++++- src/radical/pilot/agent/executing/sleep.py | 4 +- src/radical/pilot/agent/launch_method/flux.py | 7 +- src/radical/pilot/agent/scheduler/base.py | 56 ++--- .../pilot/agent/scheduler/continuous.py | 2 +- src/radical/pilot/configs/pmgr_default.json | 5 + src/radical/pilot/configs/tmgr_default.json | 2 +- src/radical/pilot/pmgr/launching/default.py | 219 +++++------------- src/radical/pilot/raptor/master.py | 10 +- src/radical/pilot/raptor/worker.py | 24 +- src/radical/pilot/task_description.py | 2 +- src/radical/pilot/task_manager.py | 5 +- .../pilot/tmgr/staging_input/default.py | 107 +++++---- .../pilot/tmgr/staging_output/default.py | 37 ++- src/radical/pilot/utils/component.py | 2 +- 18 files changed, 290 insertions(+), 291 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index d3d4d6f083..e385f249a5 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -102,12 +102,6 @@ def wrapped_main(cfg, log, prof): # create the bridge, store connection addresses in FS, and begin to work bridge = ru.zmq.Bridge.create(cfg) - - ru.write_json('%s/%s.cfg' % (cfg.path, cfg.uid), - {'uid' : cfg.uid, - bridge.type_in : str(bridge.addr_in), - bridge.type_out: str(bridge.addr_out)}) - bridge.start() # bridge runs - send heartbeats so that cmgr knows about it @@ -133,7 +127,7 @@ def wrapped_main(cfg, log, prof): # register cmgr heartbeat by beating once hb.beat(uid=cfg.cmgr) - # record cmgr heartbeats + # react on cmgr heartbeats def hb_sub_cb(topic, msg): if msg['uid'] == cfg.cmgr: hb.beat(uid=cfg.cmgr) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index c6dca8d7f8..edbb33874b 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -5,6 +5,7 @@ import os import sys +import random import radical.pilot as rp import radical.utils as ru @@ -40,7 +41,8 @@ try: # read the config used for resource details - config = ru.read_json('%s/config.json' % os.path.dirname(os.path.abspath(__file__))) + config = ru.read_json('%s/config.json' + % os.path.dirname(os.path.abspath(__file__))) pmgr = rp.PilotManager(session=session) tmgr = rp.TaskManager(session=session) @@ -51,20 +53,19 @@ # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object pd_init = {'resource' : resource, - 'runtime' : 30, # pilot runtime (min) + 'runtime' : 120, # pilot runtime (min) 'exit_on_error' : True, 'project' : config[resource].get('project', None), 'queue' : config[resource].get('queue', None), 'access_schema' : config[resource].get('schema', None), - 'cores' : config[resource].get('cores', 1), - 'gpus' : config[resource].get('gpus', 0), + 'nodes' : 1024 * 4, } pdesc = rp.PilotDescription(pd_init) # Launch the pilot. pilot = pmgr.submit_pilots(pdesc) - n = 1024 # number of tasks to run + n = 1024 * 1024 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. @@ -78,11 +79,16 @@ for i in range(0, n): # create a new task description, and fill it. - # Here we don't use dict initialization. - td = rp.TaskDescription() - td.executable = '/bin/date' - td.cpu_processes = 1 - tds.append(td) + tds.append(rp.TaskDescription({ + + 'sandbox' : 'task_sandbox', + 'executable' : '%s/examples/hello_rp.sh' % os.getcwd(), + 'arguments' : [random.randint(1,10) + 10], + 'cpu_process_type': rp.MPI, + 'cpu_processes' : random.choice([1, 2, 4, 8, 16]), + 'cpu_threads' : random.randint(1,2), + 'gpu_processes' : random.choice([0,0,0,0,0,0,1,2]), + })) report.progress() report.progress_done() diff --git a/examples/01_task_details.py b/examples/01_task_details.py index 9f05471707..5c10a1ba8b 100755 --- a/examples/01_task_details.py +++ b/examples/01_task_details.py @@ -104,7 +104,7 @@ report.info('\n') for task in tasks: - report.plain(' * %s: %s, exit: %3s, out: %s\n' + report.plain(' * %s: %s, exit: %3s, out: %s' % (task.uid, task.state[:4], task.exit_code, task.stdout[:35])) @@ -115,13 +115,6 @@ report.plain("exit code : %s\n" % task_dict['exit_code']) report.plain("stdout : %s\n" % task_dict['stdout']) - # get some more details for one task: - task_dict = tasks[1].as_dict() - report.plain("task workdir : %s\n" % task_dict['task_sandbox']) - report.plain("pilot id : %s\n" % task_dict['pilot']) - report.plain("exit code : %s\n" % task_dict['exit_code']) - report.plain("exit stdout : %s\n" % task_dict['stdout']) - except Exception as e: # Something unexpected happened in the pilot code above diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 7f8d1bed7b..f388ed2cc7 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -55,6 +55,8 @@ def __init__(self, cfg, session): self._starttime = time.time() self._final_cause = None + rpu.Worker.__init__(self, self._cfg, session) + # this is the earliest point to sync bootstrap and agent profiles prof = ru.Profiler(ns='radical.pilot', name='agent.0') prof.prof('hostname', uid=cfg.pid, msg=ru.get_hostname()) @@ -62,6 +64,9 @@ def __init__(self, cfg, session): # connect to MongoDB for state push/pull self._connect_db() + # connect to client communication channels, maybe + self._connect_communication() + # configure ResourceManager before component startup, as components need # ResourceManager information for function (scheduler, executor) self._configure_rm() @@ -84,8 +89,6 @@ def __init__(self, cfg, session): # at this point the session is up and connected, and it should have # brought up all communication bridges and components. We are # ready to rumble! - rpu.Worker.__init__(self, self._cfg, session) - self.register_subscriber(rpc.CONTROL_PUBSUB, self._check_control) # run our own slow-paced heartbeat monitor to watch pmgr heartbeats @@ -141,6 +144,53 @@ def _connect_db(self): self._dbs = DBSession(sid=self._cfg.sid, dburl=self._cfg.dburl, cfg=self._cfg, log=self._log) + + # -------------------------------------------------------------------------- + # + def _connect_communication(self): + + # when running on the same host as the client, we may be able to bypass + # MongoDB and instead connect to the client's ZMQ communication + # channels. + # + cfg = self._cfg.pilot_comm + + ru.write_json('pmgr_comm_pubsub.cfg', cfg.comm) + self.register_subscriber('pmgr_comm_pubsub', self._pilot_comm_cb) + self.register_publisher('pmgr_comm_pubsub') + + self._client_input = ru.zmq.Getter(cfg.input['channel'], + cfg.input['get'], + self._client_input_cb) + + self._client_output = ru.zmq.Putter(cfg.output['channel'], + cfg.output['get']) + + # allo comm pubsub to connect + time.sleep(1) + + # how do we verify that the comm channel is up? + self.publish('pmgr_comm_pubsub', msg={'cmd': 'pilot_connect', + 'arg': {'pid' : self._pid, + 'input' : cfg.input, + 'output': cfg.output}}) + + + # -------------------------------------------------------------------------- + # + def _client_input_cb(self, msg): + + self._log.debug('=== input cb: %s %s', msg) + self._client_output.put(msg) + + + # -------------------------------------------------------------------------- + # + def _pilot_comm_cb(self, topic, msg): + + self._log.debug('=== ctl sub cb: %s %s', topic, msg) + + # -------------------------------------------------------------------------- # def _configure_rm(self): @@ -565,7 +615,7 @@ def _check_rpc(self): # document has no rpc request return True - self._log.debug('=== rpc req: %s', rpc_req) + self._log.debug('rpc req: %s', rpc_req) # RPCs are synchronous right now - we send the RPC on the command # channel, hope that some component picks it up and replies, and then diff --git a/src/radical/pilot/agent/executing/sleep.py b/src/radical/pilot/agent/executing/sleep.py index 33d9b1154a..5974fc9c0e 100644 --- a/src/radical/pilot/agent/executing/sleep.py +++ b/src/radical/pilot/agent/executing/sleep.py @@ -79,7 +79,7 @@ def work(self, tasks): t['to_finish'] = now + float(t['description']['arguments'][0]) - self._log.debug('=== started new tasks : %d', len(tasks)) + self._log.debug('started new tasks : %d', len(tasks)) with self._tasks_lock: self._tasks.extend(tasks) @@ -118,7 +118,7 @@ def _collect(self): self._prof.prof('exec_stop', uid=uid) self._prof.prof('unschedule_start', uid=uid) - self._log.debug('=== collected : %d', len(to_finish)) + self._log.debug('collected : %d', len(to_finish)) self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, to_finish) diff --git a/src/radical/pilot/agent/launch_method/flux.py b/src/radical/pilot/agent/launch_method/flux.py index 799c5766a7..a2aa0797ea 100644 --- a/src/radical/pilot/agent/launch_method/flux.py +++ b/src/radical/pilot/agent/launch_method/flux.py @@ -55,7 +55,6 @@ def rm_config_hook(cls, name, cfg, rm, logger, profiler): while True: line = ru.as_string(proc.stdout.readline().strip()) - logger.debug('=== %s', line) if line.startswith('export '): k, v = line.split(' ', 1)[1].strip().split('=', 1) @@ -79,7 +78,7 @@ def rm_config_hook(cls, name, cfg, rm, logger, profiler): # ---------------------------------------------------------------------- def _watch_flux(flux_env): - logger.info('=== starting flux watcher') + logger.info('starting flux watcher') for k,v in flux_env.items(): os.environ[k] = v @@ -88,10 +87,10 @@ def _watch_flux(flux_env): while not ret: out, err, ret = ru.sh_callout('flux ping -c 1 all') - logger.debug('=== flux watcher out: %s', out) + logger.debug('flux watcher out: %s', out) if ret: - logger.error('=== flux watcher err: %s', err) + logger.error('flux watcher err: %s', err) break time.sleep(0.1) diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index e1f69b6931..9e32fe22d1 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -457,7 +457,7 @@ def slot_status(self, msg=None): ret += glyphs[gpu] ret += '|' - # self._log.debug("==== status: %-30s: %s", msg, ret) + # self._log.debug("status: %-30s: %s", msg, ret) return ret @@ -542,7 +542,7 @@ def unschedule_cb(self, topic, msg): msg = [msg] self._queue_unsched.put(msg) - # self._log.debug('=== unscheduled from queue : %d', len(msg)) + # self._log.debug('unscheduled from queue : %d', len(msg)) # return True to keep the cb registered return True @@ -607,19 +607,19 @@ def _schedule_tasks(self): # idle if this iteration changes no state old_state = [resources, len(self._waitpool)] - # self._log.debug('==== schedule tasks 0: RX %s [%d]', resources, len(self._waitpool)) + # self._log.debug('schedule tasks 0: RX %s [%d]', resources, len(self._waitpool)) # if we have new resources, try to place waiting tasks. r_wait = False if resources: r_wait = self._schedule_waitpool() - # self._log.debug('==== scheduled tasks w: RX %s %s', resources, r_wait) + # self._log.debug('scheduled tasks w: RX %s %s', resources, r_wait) # always try to schedule newly incoming tasks # running out of resources for incoming could still mean we have # smaller slots for waiting tasks, so ignore `r` for now. r_inc = self._schedule_incoming() - # self._log.debug('==== scheduled tasks i: RX %s %s', resources, r_inc) + # self._log.debug('scheduled tasks i: RX %s %s', resources, r_inc) # if we had resources, but could not schedule any incoming not any # waiting, then we effectively ran out of *useful* resources @@ -633,7 +633,7 @@ def _schedule_tasks(self): r = self._unschedule_completed() if not resources and r: resources = True - # self._log.debug('==== scheduled tasks c: RX %s %s', resources, r) + # self._log.debug('scheduled tasks c: RX %s %s', resources, r) # idle if this iteration changes no state if old_state != [resources, len(self._waitpool)]: @@ -651,7 +651,7 @@ def _prof_sched_skip(self, task): # def _schedule_waitpool(self): - # self._log.debug("==== schedule waitpool %d waiting" % len(self._waitpool)) + # self._log.debug("schedule waitpool %d waiting" % len(self._waitpool)) resources = None # default: no change to resource state @@ -668,14 +668,14 @@ def _schedule_waitpool(self): (x['tuple_size'][0] + x['tuple_size'][2]) * x['tuple_size'][1], reverse=True) - # self._log.debug("==== schedule waitpool %d", len(tasks)) + # self._log.debug("schedule waitpool %d", len(tasks)) # cycle through waitpool, and see if we get anything placed now. scheduled, unscheduled = ru.lazy_bisect(tasks, check=self._try_allocation, on_skip=self._prof_sched_skip, log=self._log) - # self._log.debug("==== schedules waitpool %d", len(scheduled)) + # self._log.debug("schedules waitpool %d", len(scheduled)) # for task in scheduled: # self._prof.prof('schedule_wait', uid=task['uid']) @@ -689,7 +689,7 @@ def _schedule_waitpool(self): resources = False if scheduled: - self._log.debug('=== scheduled from waitpool: %d', len(scheduled)) + self._log.debug('scheduled from waitpool: %d', len(scheduled)) # update task resources for task in scheduled: @@ -701,7 +701,7 @@ def _schedule_waitpool(self): self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, publish=True, push=True) - # self._log.debug("==== after schedule waitpool %s: %d waiting", + # self._log.debug("after schedule waitpool %s: %d waiting", # resources, len(self._waitpool)) return resources @@ -710,7 +710,7 @@ def _schedule_waitpool(self): # def _schedule_incoming(self): - # self._log.debug("==== before schedule incoming: waiting: %d", + # self._log.debug("before schedule incoming: waiting: %d", # len(self._waitpool)) resources = None # n o change in resource status @@ -734,7 +734,7 @@ def _schedule_incoming(self): if len(tasks) > CHUNKSIZE: # stop collecting, schedule what we have, only continue # here when all tasks can be scheduled - # self._log.debug('==== break for chunk %d', CHUNKSIZE) + # self._log.debug('break for chunk %d', CHUNKSIZE) break except queue.Empty: @@ -742,11 +742,11 @@ def _schedule_incoming(self): pass if not tasks: - # self._log.debug('==== return for empty queue') + # self._log.debug('return for empty queue') # no resource change, no activity return None - self._log.debug("==== schedule incoming [%d]", len(tasks)) + self._log.debug("schedule incoming [%d]", len(tasks)) # handle largest tasks first # FIXME: this needs lazy-bisect @@ -782,8 +782,8 @@ def _schedule_incoming(self): # if tasks remain waiting, we are out of usable resources resources = False - self._log.debug('=== unscheduled incoming: %d', len(scheduled)) - self._log.debug('=== scheduled incoming: %d', len(unscheduled)) + self._log.debug('unscheduled incoming: %d', len(scheduled)) + self._log.debug('scheduled incoming: %d', len(unscheduled)) # if we could not schedule any task from the last chunk, then we # should break to allow the unschedule to kick in @@ -792,7 +792,7 @@ def _schedule_incoming(self): if unscheduled: break - self._log.debug("==== after schedule incoming: waiting: %d", + self._log.debug("after schedule incoming: waiting: %d", len(self._waitpool)) return resources @@ -802,7 +802,7 @@ def _schedule_incoming(self): # def _unschedule_completed(self): - # self._log.debug("==== unschedule completed") + # self._log.debug("unschedule completed") to_unschedule = list() try: @@ -822,7 +822,7 @@ def _unschedule_completed(self): while not self._proc_term.is_set(): tasks = self._queue_unsched.get(timeout=0.001) to_unschedule.extend(tasks) - # self._log.debug('=== unscheduled to batch : %d', len(tasks)) + # self._log.debug('unscheduled to batch : %d', len(tasks)) if len(to_unschedule) > CHUNKSIZE: break @@ -830,13 +830,13 @@ def _unschedule_completed(self): # no more unschedule requests pass - # self._log.debug("==== unschedule completed %d", len(to_unschedule)) + # self._log.debug("unschedule completed %d", len(to_unschedule)) if to_unschedule: # rebuild the tuple_size binning, maybe - self._log.debug('=== unscheduled refresh : %d', len(to_unschedule)) + self._log.debug('unscheduled refresh : %d', len(to_unschedule)) self._refresh_ts_map() - self._log.debug('=== unscheduled refreshed : %d', len(to_unschedule)) + self._log.debug('unscheduled refreshed : %d', len(to_unschedule)) to_release = list() # unscheduling tasks to release slots from @@ -896,7 +896,7 @@ def _unschedule_completed(self): to_advance.append(replace) if to_advance: - self._log.debug('=== unscheduled advance : %d', len(to_advance)) + self._log.debug('unscheduled advance : %d', len(to_advance)) self.advance(to_advance, rps.AGENT_EXECUTING_PENDING, publish=True, push=True) @@ -904,21 +904,21 @@ def _unschedule_completed(self): # we have tasks to unschedule, which will free some resources. We can # thus try to schedule larger tasks again, and also inform the caller # about resource availability. - # self._log.debug("==== release completed %d", len(to_release)) + # self._log.debug("release completed %d", len(to_release)) if to_release: for task in to_release: self.unschedule_task(task) self._prof.prof('unschedule_stop', uid=task['uid']) - self._log.debug('=== unscheduled release : %d', len(to_release)) + self._log.debug('unscheduled release : %d', len(to_release)) # if previously waiting tasks were placed, remove them from the waitpool - # self._log.debug("==== scheduled completed %d", len(placed)) + # self._log.debug("scheduled completed %d", len(placed)) if placed: for uid in placed: del(self._waitpool[uid]) - # self._log.debug("=== unscheduled and replaced : %d / %d", len(to_unschedule), len(placed)) + # self._log.debug("unscheduled and replaced : %d / %d", len(to_unschedule), len(placed)) if to_release: return True # new resources else : return False diff --git a/src/radical/pilot/agent/scheduler/continuous.py b/src/radical/pilot/agent/scheduler/continuous.py index 85c9a8cfa2..431f7386e5 100644 --- a/src/radical/pilot/agent/scheduler/continuous.py +++ b/src/radical/pilot/agent/scheduler/continuous.py @@ -221,7 +221,7 @@ def _find_resources(self, node, find_slots, cores_per_slot, gpus_per_slot, thread count and using physical core IDs for process placement? ''' - # self._log.debug('=== find on %s: %s * [%s, %s]', node['uid'], ) + # self._log.debug('find on %s: %s * [%s, %s]', node['uid'], ) # check if the node can host the request free_cores = node['cores'].count(rpc.FREE) diff --git a/src/radical/pilot/configs/pmgr_default.json b/src/radical/pilot/configs/pmgr_default.json index 2496d3ef83..c2430a1cb4 100644 --- a/src/radical/pilot/configs/pmgr_default.json +++ b/src/radical/pilot/configs/pmgr_default.json @@ -15,6 +15,11 @@ "pmgr_launching_queue" : {"kind" : "queue", "log_level" : "error", "stall_hwm" : 0, + "bulk_size" : 0}, + + "pmgr_comm_pubsub" : {"kind" : "pubsub", + "log_level" : "debug", + "stall_hwm" : 0, "bulk_size" : 0} }, diff --git a/src/radical/pilot/configs/tmgr_default.json b/src/radical/pilot/configs/tmgr_default.json index 6ad3bd533f..c152c8ca5b 100644 --- a/src/radical/pilot/configs/tmgr_default.json +++ b/src/radical/pilot/configs/tmgr_default.json @@ -23,11 +23,11 @@ "log_level" : "error", "stall_hwm" : 0, "bulk_size" : 0}, + "tmgr_staging_output_queue" : {"kind" : "queue", "log_level" : "error", "stall_hwm" : 0, "bulk_size" : 0}, - "tmgr_unschedule_pubsub" : {"kind" : "pubsub", "log_level" : "error", "stall_hwm" : 0, diff --git a/src/radical/pilot/pmgr/launching/default.py b/src/radical/pilot/pmgr/launching/default.py index eac43b24cd..478381d18c 100644 --- a/src/radical/pilot/pmgr/launching/default.py +++ b/src/radical/pilot/pmgr/launching/default.py @@ -17,9 +17,11 @@ from ... import states as rps from ... import constants as rpc +from ... import utils as rpu from .base import PMGRLaunchingComponent + from ...staging_directives import complete_url, expand_staging_directives @@ -77,7 +79,7 @@ def initialize(self): self.register_timed_cb(self._pilot_watcher_cb, timer=10.0) # we listen for pilot cancel commands - self.register_subscriber(rpc.CONTROL_PUBSUB, self._pmgr_control_cb) + self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) # also listen for completed staging directives self.register_subscriber(rpc.STAGER_RESPONSE_PUBSUB, self._staging_ack_cb) @@ -89,6 +91,9 @@ def initialize(self): self._rp_version, _, _, _, self._rp_sdist_name, self._rp_sdist_path = \ ru.get_version([self._mod_dir, self._root_dir]) + # create our own bridges to communicate with the pilots + self._bridges = list() + # -------------------------------------------------------------------------- # @@ -119,7 +124,14 @@ def finalize(self): # -------------------------------------------------------------------------- # - def _pmgr_control_cb(self, topic, msg): + def _pmgr_comm_cb(self, topic, msg): + + self._log.debug('comm msg: %s', msg) + + + # -------------------------------------------------------------------------- + # + def _control_cb(self, topic, msg): cmd = msg['cmd'] arg = msg['arg'] @@ -152,157 +164,6 @@ def _pmgr_control_cb(self, topic, msg): return True - # -------------------------------------------------------------------------- - # - def _handle_pilot_input_staging(self, pilot, sds): - - pid = pilot['uid'] - - # NOTE: no task sandboxes defined! - src_context = {'pwd' : pilot['client_sandbox'], - 'pilot' : pilot['pilot_sandbox'], - 'resource': pilot['resource_sandbox']} - tgt_context = {'pwd' : pilot['pilot_sandbox'], - 'pilot' : pilot['pilot_sandbox'], - 'resource': pilot['resource_sandbox']} - - # Iterate over all directives - for sd in sds: - - # TODO: respect flags in directive - - action = sd['action'] - flags = sd['flags'] - did = sd['uid'] - src = sd['source'] - tgt = sd['target'] - - assert(action in [rpc.COPY, rpc.LINK, rpc.MOVE, rpc.TRANSFER]) - - self._prof.prof('staging_in_start', uid=pid, msg=did) - - src = complete_url(src, src_context, self._log) - tgt = complete_url(tgt, tgt_context, self._log) - - if action in [rpc.COPY, rpc.LINK, rpc.MOVE]: - self._prof.prof('staging_in_fail', uid=pid, msg=did) - raise ValueError("invalid action '%s' on pilot level" % action) - - self._log.info('transfer %s to %s', src, tgt) - - # FIXME: make sure that tgt URL points to the right resource - # FIXME: honor sd flags if given (recursive...) - flags = rsfs.CREATE_PARENTS - - if os.path.isdir(src.path): - flags |= rsfs.RECURSIVE - - # Define and open the staging directory for the pilot - # We use the target dir construct here, so that we can create - # the directory if it does not yet exist. - - # url used for cache (sandbox url w/o path) - fs_url = rs.Url(pilot['pilot_sandbox']) - fs_url.path = '/' - key = str(fs_url) - - self._log.debug("rs.file.Directory ('%s')", key) - - with self._cache_lock: - if key in self._saga_fs_cache: - fs = self._saga_fs_cache[key] - - else: - fs = rsfs.Directory(fs_url, session=self._session) - self._saga_fs_cache[key] = fs - - fs.copy(src, tgt, flags=flags) - - sd['state'] = rps.DONE - - self._prof.prof('staging_in_stop', uid=pid, msg=did) - - self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'pilot_staging_input_result', - 'arg': {'pilot': pilot, - 'sds' : sds}}) - - - # -------------------------------------------------------------------------- - # - def _handle_pilot_output_staging(self, pilot, sds): - - pid = pilot['uid'] - - # NOTE: no task sandboxes defined! - src_context = {'pwd' : pilot['pilot_sandbox'], - 'pilot' : pilot['pilot_sandbox'], - 'resource': pilot['resource_sandbox']} - tgt_context = {'pwd' : pilot['client_sandbox'], - 'pilot' : pilot['pilot_sandbox'], - 'resource': pilot['resource_sandbox']} - - # Iterate over all directives - for sd in sds: - - try: - - action = sd['action'] - flags = sd['flags'] - did = sd['uid'] - src = sd['source'] - tgt = sd['target'] - - assert(action in [rpc.COPY, rpc.LINK, rpc.MOVE, rpc.TRANSFER]) - - self._prof.prof('staging_out_start', uid=pid, msg=did) - - if action in [rpc.COPY, rpc.LINK, rpc.MOVE]: - raise ValueError("invalid pilot action '%s'" % action) - - src = complete_url(src, src_context, self._log) - tgt = complete_url(tgt, tgt_context, self._log) - - self._log.info('transfer %s to %s', src, tgt) - - # FIXME: make sure that tgt URL points to the right resource - # FIXME: honor sd flags if given (recursive...) - flags = rsfs.CREATE_PARENTS - - if os.path.isdir(src.path): - flags |= rsfs.RECURSIVE - - # Define and open the staging directory for the pilot - - # url used for cache (sandbox url w/o path) - fs_url = rs.Url(pilot['pilot_sandbox']) - fs_url.path = '/' - key = str(fs_url) - - with self._cache_lock: - if key in self._saga_fs_cache: - fs = self._saga_fs_cache[key] - - else: - fs = rsfs.Directory(fs_url, session=self._session) - self._saga_fs_cache[key] = fs - - fs.copy(src, tgt, flags=flags) - - sd['state'] = rps.DONE - self._prof.prof('staging_out_stop', uid=pid, msg=did) - - except: - self._log.exception('pilot level staging failed') - self._prof.prof('staging_out_fail', uid=pid, msg=did) - sd['state'] = rps.FAILED - - - self.publish(rpc.CONTROL_PUBSUB, - {'cmd': 'pilot_staging_output_result', - 'arg': {'pilot': pilot, - 'sds' : [sd]}}) - - # -------------------------------------------------------------------------- # def _pilot_watcher_cb(self): @@ -677,7 +538,7 @@ def _start_pilot_bulk(self, resource, schema, pilots): # direct staging, use first pilot for staging context # NOTE: this implies that the SDS can only refer to session # sandboxes, not to pilot sandboxes! - self._log.debug('==== %s', info['sds']) + self._log.debug(info['sds']) self._stage_in(pilots[0], info['sds']) for ft in ft_list: @@ -1129,6 +990,49 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): agent_cfg['resource_cfg'] = copy.deepcopy(rcfg) agent_cfg['debug'] = self._log.getEffectiveLevel() + + # the pilot also gets contact points points for some client side + # communication channels. If the pilot happens to be able to + # connect to them, they will get used for communication + # - otherwise we fall back to MongoDB. + # + # We will need a separate queue for each pilot from which the + # pilot can pull tasks, so that queue cannot be defined in + # a static config file. Instead we start it here and own that + # queue for as long as the agent lives (in practice, for as long + # as this pmgr lives). + + bcfg = ru.Config(cfg={'channel' : '%s.in' % pid, + 'type' : 'queue', + 'stall_hwm' : 1, + 'bulk_size' : 0, + 'path' : self._cfg.path}) + b_in = ru.zmq.Queue(bcfg) + b_in.start() + self._bridges.append(b_in) + + bcfg = ru.Config(cfg={'channel' : '%s.out' % pid, + 'type' : 'queue', + 'stall_hwm' : 1, + 'bulk_size' : 0, + 'path' : self._cfg.path}) + b_out = ru.zmq.Queue(bcfg) + b_out.start() + self._bridges.append(b_out) + + self.register_subscriber('pmgr_comm_pubsub', self._pmgr_comm_cb) + + comm_cfg = ru.read_json('%s/pmgr_comm_pubsub.cfg' % self._cfg.path) + + agent_cfg['pilot_comm'] = { + 'input' : {'channel': b_in.channel, + 'put' : str(b_in.addr_put), + 'get' : str(b_in.addr_get)}, + 'output' : {'channel': b_out.channel, + 'put' : str(b_out.addr_put), + 'get' : str(b_out.addr_get)}, + 'comm' : comm_cfg} + # we'll also push the agent config into MongoDB pilot['cfg'] = agent_cfg @@ -1145,7 +1049,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # always stage agent cfg for each pilot, not in the tarball # FIXME: purge the tmp file after staging - self._log.debug('=== cfg %s -> %s', agent_cfg['pid'], pilot_sandbox) + self._log.debug('cfg %s -> %s', agent_cfg['pid'], pilot_sandbox) ret['sds'].append({'source': cfg_tmp_file, 'target': '%s/%s' % (pilot['pilot_sandbox'], agent_cfg_name), 'action': rpc.TRANSFER}) @@ -1274,11 +1178,6 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): 'site:%s/%s > %s' % (session_sandbox, sdist, sdist) ]) - if stage_cacerts: - jd.file_transfer.extend([ - 'site:%s/%s > %s' % (session_sandbox, certs, certs) - ]) - self._log.debug("Bootstrap command line: %s %s", jd.executable, jd.arguments) ret['jd'] = jd diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index 0fa52187f1..0c4ca3ae1b 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -381,7 +381,7 @@ def _run(self): # subscrive to state updates on the update pubsub? time.sleep(1.0) - self._log.debug('=== master term') + self._log.debug('master term') # -------------------------------------------------------------------------- @@ -405,7 +405,7 @@ def request(self, reqs): objs.append(request) # push the request message (as dictionary) onto the request queue - self._log.debug('=== put %d: [%s]', len(dicts), + self._log.debug('put %d: [%s]', len(dicts), [r['uid'] for r in dicts]) self._req_put.put(dicts) @@ -455,7 +455,7 @@ def terminate(self): self._term.set() for uid in self._workers: - self._log.debug('=== master %s sends term to %s', self._uid, uid) + self._log.debug('master %s sends term to %s', self._uid, uid) self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'worker_terminate', 'arg': {'uid': uid}}) @@ -465,10 +465,10 @@ def terminate(self): states = [self._workers[uid]['state'] for uid in uids] if set(states) == {'DONE'}: break - self._log.debug('=== states: %s', states) + self._log.debug('states: %s', states) time.sleep(1) - self._log.debug('=== all workers terminated') + self._log.debug('all workers terminated') # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/raptor/worker.py b/src/radical/pilot/raptor/worker.py index f419aadf9e..aacc3aa974 100644 --- a/src/radical/pilot/raptor/worker.py +++ b/src/radical/pilot/raptor/worker.py @@ -389,10 +389,10 @@ def _request_cb(self, tasks): invoke them. ''' - self._log.debug('=== req_loop %s', len(ru.as_list(tasks))) + self._log.debug('req_loop %s', len(ru.as_list(tasks))) for task in ru.as_list(tasks): - self._log.debug('=== req_recv %s', task['uid']) + self._log.debug('req_recv %s', task['uid']) task['worker'] = self._uid try: @@ -400,14 +400,14 @@ def _request_cb(self, tasks): # many cpus and gpus we need to mark as busy while not self._alloc_task(task): - self._log.debug('=== req_alloc %s', task['uid']) + self._log.debug('req_alloc %s', task['uid']) # no resource - wait for new resources # # NOTE: this will block smaller tasks from being executed # right now. alloc_task is not a proper scheduler, # after all. # while not self._res_evt.wait(timeout=1.0): - # self._log.debug('=== req_alloc_wait %s', task['uid']) + # self._log.debug('req_alloc_wait %s', task['uid']) time.sleep(0.01) @@ -418,7 +418,7 @@ def _request_cb(self, tasks): self._res_evt.clear() - self._log.debug('=== req_alloced %s', task['uid']) + self._log.debug('req_alloced %s', task['uid']) self._prof.prof('req_start', uid=task['uid'], msg=self._uid) # we got an allocation for this task, and can run it, so apply @@ -443,7 +443,7 @@ def _request_cb(self, tasks): self._log.debug('applied: %s: %s: %s', task['uid'], proc.pid, self._pool.keys()) - self._log.debug('=== req_started %s: %s', task['uid'], proc.pid) + self._log.debug('req_started %s: %s', task['uid'], proc.pid) except Exception as e: @@ -460,7 +460,7 @@ def _request_cb(self, tasks): self._res_put.put(res) - self._log.debug('=== req_looped') + self._log.debug('req_looped') def _after_fork(): @@ -495,7 +495,7 @@ def _dispatch_thread(): mode = task['mode'] assert(mode in self._modes), 'no such call mode %s' % mode - self._log.debug('=== debug %s: %s', task['uid'], task) + self._log.debug('debug %s: %s', task['uid'], task) tout = task.get('timeout') self._log.debug('dispatch with tout %s', tout) @@ -507,9 +507,9 @@ def _dispatch_thread(): # dispatcher = mp.Process(target=_dispatch_thread) # dispatcher.daemon = True # dispatcher.start() - # self._log.debug('=== join %s: %s', task['uid'], task) + # self._log.debug('join %s: %s', task['uid'], task) # dispatcher.join(timeout=tout) - # self._log.debug('=== joined %s: %s', task['uid'], tout) + # self._log.debug('joined %s: %s', task['uid'], tout) # # if dispatcher.is_alive(): # dispatcher.kill() @@ -551,7 +551,7 @@ def _result_watcher(self): try: while not self._term.is_set(): - # self._log.debug('=== waiting for results') + # self._log.debug('waiting for results') try: res = self._result_queue.get(timeout=0.1) @@ -565,7 +565,7 @@ def _result_watcher(self): raise finally: - self._log.debug('=== send unregister') + self._log.debug('send unregister') if self._cfg['rank'] == 0: self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'worker_unregister', 'arg': {'uid' : self._cfg['wid']}}) diff --git a/src/radical/pilot/task_description.py b/src/radical/pilot/task_description.py index 5e61844b34..c1d3e0ce32 100644 --- a/src/radical/pilot/task_description.py +++ b/src/radical/pilot/task_description.py @@ -292,7 +292,7 @@ class TaskDescription(ru.Description): Staging Directives - ================== + ------------------ The Staging Directives are specified using a dict in the following form: diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index b5b94fe5d2..5169c18b00 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -699,7 +699,9 @@ def add_pilots(self, pilots): # sanity check, and keep pilots around for inspection for pilot in pilots: + pid = pilot.uid + if pid in self._pilots: raise ValueError('pilot %s already added' % pid) self._pilots[pid] = pilot @@ -707,9 +709,8 @@ def add_pilots(self, pilots): # subscribe for state updates pilot.register_callback(self._pilot_state_cb) - pilot_docs = [pilot.as_dict() for pilot in pilots] - # publish to the command channel for the scheduler to pick up + pilot_docs = [pilot.as_dict() for pilot in pilots] self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'add_pilots', 'arg' : {'pilots': pilot_docs, 'tmgr' : self.uid}}) diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index d563206954..2218ec9d10 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -10,7 +10,7 @@ import radical.utils as ru import radical.saga as rs -rs.fs = rs.filesystem +rsfs = rs.filesystem from ... import states as rps from ... import constants as rpc @@ -62,12 +62,15 @@ def initialize(self): self.register_input(rps.TMGR_STAGING_INPUT_PENDING, rpc.TMGR_STAGING_INPUT_QUEUE, self.work) - # FIXME: this queue is inaccessible, needs routing via mongodb + # this queue is inaccessible, needs routing via mongodb self.register_output(rps.AGENT_STAGING_INPUT_PENDING, None) + # alternatively, we keep a registry of pilot specific queues to route + self._pilot_queues = dict() # pid : zmq.Putter + # we subscribe to the command channel to learn about pilots being added # to this task manager. - self.register_subscriber(rpc.CONTROL_PUBSUB, self._base_command_cb) + self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) # -------------------------------------------------------------------------- @@ -80,7 +83,7 @@ def finalize(self): # -------------------------------------------------------------------------- # - def _base_command_cb(self, topic, msg): + def _control_cb(self, topic, msg): # keep track of `add_pilots` commands and updates self._pilots # accordingly. @@ -88,20 +91,30 @@ def _base_command_cb(self, topic, msg): cmd = msg.get('cmd') arg = msg.get('arg') - if cmd not in ['add_pilots']: - self._log.debug('skip cmd %s', cmd) + if cmd == 'add_pilots': + + pilots = arg.get('pilots', []) + + if not isinstance(pilots, list): + pilots = [pilots] - pilots = arg.get('pilots', []) + with self._pilots_lock: + for pilot in pilots: + pid = pilot['uid'] + self._log.debug('add pilot %s', pid) + if pid not in self._pilots: + self._pilots[pid] = pilot + + elif cmd == 'pilot_connect': - if not isinstance(pilots, list): - pilots = [pilots] + pid = arg['pid'] + cfg = arg['cfg'] - with self._pilots_lock: - for pilot in pilots: - pid = pilot['uid'] - self._log.debug('add pilot %s', pid) - if pid not in self._pilots: - self._pilots[pid] = pilot + self._pilot_queues[pid] = ru.zmq.Putter(cfg['input']['channel'], + cfg['input']['put']) + + else: + self._log.debug('skip cmd %s', cmd) return True @@ -116,11 +129,15 @@ def work(self, tasks): # advance them again as a bulk. We work over the others one by one, and # advance them individually, to avoid stalling from slow staging ops. - no_staging_tasks = list() - staging_tasks = list() + staging_tasks = dict() # pid: [tasks] + no_staging_tasks = dict() # pid: [tasks] for task in tasks: + pid = task['pilot'] + if pid not in staging_tasks : staging_tasks[pid] = list() + if pid not in no_staging_tasks: no_staging_tasks[pid] = list() + # no matter if we perform any staging or not, we will push the full # task info to the DB on the next advance, and will pass control to # the agent. @@ -135,9 +152,9 @@ def work(self, tasks): actionables.append(sd) if actionables: - staging_tasks.append([task, actionables]) + staging_tasks[pid].append([task, actionables]) else: - no_staging_tasks.append(task) + no_staging_tasks[pid].append(task) # Optimization: if we obtained a large bulk of tasks, we at this point # attempt a bulk mkdir for the task sandboxes, to free the agent of @@ -162,17 +179,16 @@ def work(self, tasks): # to do about the pilot configuration (sandbox, access schema, etc), so # we only attempt this optimization for tasks scheduled to pilots for # which we learned those details. - task_sboxes_by_pid = dict() - for task in no_staging_tasks: - sbox = task['task_sandbox'] - pid = task['pilot'] - if pid not in task_sboxes_by_pid: - task_sboxes_by_pid[pid] = list() - task_sboxes_by_pid[pid].append(sbox) + sboxes = dict() # pid: [sboxes] + for pid in no_staging_tasks: + for task in no_staging_tasks[pid]: + if pid not in sboxes: + sboxes[pid] = list() + sboxes[pid].append(task['task_sandbox']) # now trigger the bulk mkdir for all filesystems which have more than # a certain tasks tohandle in this bulk: - for pid in task_sboxes_by_pid: + for pid in sboxes: with self._pilots_lock: pilot = self._pilots.get(pid) @@ -182,11 +198,12 @@ def work(self, tasks): self._log.debug('pid unknown - skip optimizion', pid) continue - session_sbox = self._session._get_session_sandbox(pilot) - task_sboxes = task_sboxes_by_pid[pid] + task_sboxes = sboxes[pid] if len(task_sboxes) >= TASK_BULK_MKDIR_THRESHOLD: + session_sbox = self._session._get_session_sandbox(pilot) + self._log.debug('tar %d sboxes', len(task_sboxes)) # no matter the bulk mechanism, we need a SAGA handle to the @@ -196,7 +213,7 @@ def work(self, tasks): sbox_fs_str = str(sbox_fs) if sbox_fs_str not in self._fs_cache: self._fs_cache[sbox_fs_str] = \ - rs.fs.Directory(sbox_fs, session=self._session) + rsfs.Directory(sbox_fs, session=self._session) saga_dir = self._fs_cache[sbox_fs_str] # we have two options for a bulk mkdir: @@ -238,7 +255,7 @@ def work(self, tasks): type(session_sbox)) self._log.debug('copy: %s -> %s', tar_url, tar_rem_path) saga_dir.copy(tar_url, tar_rem_path, - flags=rs.fs.CREATE_PARENTS) + flags=rsfs.CREATE_PARENTS) # get a job service handle to the target resource and run # the untar command. Use the hop to skip the batch system @@ -261,14 +278,18 @@ def work(self, tasks): j.exit_code) - if no_staging_tasks: + for pid in no_staging_tasks: - # nothing to stage, push to the agent - self.advance(no_staging_tasks, rps.AGENT_STAGING_INPUT_PENDING, - publish=True, push=True) + self.advance(no_staging_tasks[pid], rps.AGENT_STAGING_INPUT_PENDING, + publish=True, push=False) - for task,actionables in staging_tasks: - self._handle_task(task, actionables) + if pid in self._pilot_queues: + self._pilot_queues[pid].put(no_staging_tasks[pid]) + + for pid in staging_tasks: + + for task,actionables in staging_tasks[pid]: + self._handle_task(task, actionables) # -------------------------------------------------------------------------- @@ -301,10 +322,10 @@ def _handle_task(self, task, actionables): self._log.debug('key %s / %s', key, tmp) if key not in self._fs_cache: - self._fs_cache[key] = rs.fs.Directory(tmp, session=self._session) + self._fs_cache[key] = rsfs.Directory(tmp, session=self._session) saga_dir = self._fs_cache[key] - saga_dir.make_dir(sandbox, flags=rs.fs.CREATE_PARENTS) + saga_dir.make_dir(sandbox, flags=rsfs.CREATE_PARENTS) self._prof.prof("create_sandbox_stop", uid=uid) # Loop over all transfer directives and filter out tarball staging @@ -380,10 +401,10 @@ def _handle_task(self, task, actionables): # Check if the src is a folder, if true # add recursive flag if not already specified if os.path.isdir(src.path): - flags |= rs.fs.RECURSIVE + flags |= rsfs.RECURSIVE # Always set CREATE_PARENTS - flags |= rs.fs.CREATE_PARENTS + flags |= rsfs.CREATE_PARENTS src = complete_url(src, src_context, self._log) tgt = complete_url(tgt, tgt_context, self._log) @@ -406,6 +427,10 @@ def _handle_task(self, task, actionables): self.advance(task, rps.AGENT_STAGING_INPUT_PENDING, publish=True, push=True) + pid = task['pilot'] + if pid in self._pilot_queues: + self._pilot_queues[pid].put([task]) + # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/tmgr/staging_output/default.py b/src/radical/pilot/tmgr/staging_output/default.py index c66df5ee64..4337eda6f5 100644 --- a/src/radical/pilot/tmgr/staging_output/default.py +++ b/src/radical/pilot/tmgr/staging_output/default.py @@ -5,7 +5,8 @@ import os -import radical.saga as rs +import radical.saga as rs +import radical.utils as ru from ... import states as rps from ... import constants as rpc @@ -42,8 +43,15 @@ def initialize(self): self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.TMGR_STAGING_OUTPUT_QUEUE, self.work) - # we don't need an output queue -- tasks will be final + # we also listen on individual pilot queues + self._pilot_queues = dict() # pid : zmq.Getter + + # we subscribe to the command channel to learn about pilots being added + # to this task manager. + self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) + # we don't need an output queue -- tasks will be final + # # -------------------------------------------------------------------------- # @@ -55,10 +63,29 @@ def finalize(self): # -------------------------------------------------------------------------- # - def work(self, tasks): + def _control_cb(self, topic, msg): + + cmd = msg.get('cmd') + arg = msg.get('arg') + + if cmd == 'pilot_connect': + + pid = arg['pid'] + cfg = arg['cfg'] - if not isinstance(tasks, list): - tasks = [tasks] + self._pilot_queues[pid] = ru.zmq.Getter(cfg['output']['channel'], + cfg['output']['get'], + self.work) + + else: + self._log.debug('skip cmd %s', cmd) + + return True + + + # -------------------------------------------------------------------------- + # + def work(self, tasks): self.advance(tasks, rps.TMGR_STAGING_OUTPUT, publish=True, push=False) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 2e56d4bbf4..de6d9ef924 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -1029,7 +1029,7 @@ def register_subscriber(self, pubsub, cb): log=self._log, prof=self._prof) - self._subscribers[pubsub].subscribe(topic=pubsub, cb=cb, + self._subscribers[pubsub].subscribe(topic=ru.as_string(pubsub), cb=cb, lock=self._cb_lock) From 2ff74d0ada5014091ebe4f0142549a63ca487dc9 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 18 Feb 2021 10:52:26 +0100 Subject: [PATCH 009/171] var rename --- src/radical/pilot/agent/executing/sleep.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/radical/pilot/agent/executing/sleep.py b/src/radical/pilot/agent/executing/sleep.py index 5974fc9c0e..6c8735cc45 100644 --- a/src/radical/pilot/agent/executing/sleep.py +++ b/src/radical/pilot/agent/executing/sleep.py @@ -77,7 +77,7 @@ def work(self, tasks): self._prof.prof('task_exec_start', uid=uid) self._prof.prof('app_start', uid=uid) - t['to_finish'] = now + float(t['description']['arguments'][0]) + t['deadline'] = now + float(t['description']['arguments'][0]) self._log.debug('started new tasks : %d', len(tasks)) @@ -98,8 +98,8 @@ def _collect(self): now = time.time() for task in self._tasks: - if task['to_finish'] <= now: to_finish.append(task) - else : to_continue.append(task) + if task['deadline'] <= now: to_finish.append(task) + else : to_continue.append(task) self._tasks = to_continue From 05ae125e7ef816823420a00c230113de0f7c04b1 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 25 Feb 2021 21:51:32 +0100 Subject: [PATCH 010/171] snapshot --- examples/01_task_details.py | 13 +- src/radical/pilot/agent/agent_0.py | 126 +++++++++++++----- .../pilot/agent/staging_output/default.py | 10 +- src/radical/pilot/configs/agent_default.json | 45 ++----- src/radical/pilot/configs/pmgr_default.json | 12 +- .../pilot/configs/session_default.json | 29 +--- src/radical/pilot/configs/tmgr_default.json | 26 +--- src/radical/pilot/constants.py | 2 + src/radical/pilot/pilot_manager.py | 3 +- src/radical/pilot/pmgr/launching/default.py | 64 ++------- .../pilot/tmgr/staging_input/default.py | 77 ++++++----- .../pilot/tmgr/staging_output/default.py | 37 +---- src/radical/pilot/utils/component.py | 9 +- 13 files changed, 199 insertions(+), 254 deletions(-) diff --git a/examples/01_task_details.py b/examples/01_task_details.py index 5c10a1ba8b..7b88bcdc22 100755 --- a/examples/01_task_details.py +++ b/examples/01_task_details.py @@ -5,6 +5,7 @@ import os import sys +import time verbose = os.environ.get('RADICAL_PILOT_VERBOSE', 'REPORT') os.environ['RADICAL_PILOT_VERBOSE'] = verbose @@ -45,13 +46,14 @@ # read the config used for resource details report.info('read config') - config = ru.read_json('%s/config.json' % os.path.dirname(os.path.abspath(__file__))) + config = ru.read_json('%s/config.json' % os.path.dirname(__file__)) report.ok('>>ok\n') report.header('submit pilots') # Add a PilotManager. PilotManagers manage one or more pilots. pmgr = rp.PilotManager(session=session) + tmgr = rp.TaskManager(session=session) # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object @@ -68,17 +70,14 @@ # Launch the pilot. pilot = pmgr.submit_pilots(pdesc) - - report.header('submit tasks') # Register the pilot in a TaskManager object. - tmgr = rp.TaskManager(session=session) tmgr.add_pilots(pilot) # Create a workload of tasks. # Each task runs '/bin/date'. - n = 128 # number of tasks to run + n = 1024 # number of tasks to run report.info('create %d task description(s)\n\t' % n) tds = list() @@ -106,7 +105,7 @@ for task in tasks: report.plain(' * %s: %s, exit: %3s, out: %s' % (task.uid, task.state[:4], - task.exit_code, task.stdout[:35])) + task.exit_code, task.stdout)) # get some more details for one task: task_dict = tasks[0].as_dict() @@ -132,7 +131,7 @@ # always clean up the session, no matter if we caught an exception or # not. This will kill all remaining pilots. report.header('finalize') - session.close() + session.close(download=False) report.header() diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index f388ed2cc7..983ee631bc 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -61,12 +61,6 @@ def __init__(self, cfg, session): prof = ru.Profiler(ns='radical.pilot', name='agent.0') prof.prof('hostname', uid=cfg.pid, msg=ru.get_hostname()) - # connect to MongoDB for state push/pull - self._connect_db() - - # connect to client communication channels, maybe - self._connect_communication() - # configure ResourceManager before component startup, as components need # ResourceManager information for function (scheduler, executor) self._configure_rm() @@ -82,6 +76,12 @@ def __init__(self, cfg, session): self._cmgr.start_bridges() self._cmgr.start_components() + # connect to client communication channels, maybe + self._connect_client() + + # connect to MongoDB for state push/pull + self._connect_db() + # create the sub-agent configs and start the sub agents self._write_sa_configs() self._start_sub_agents() # TODO: move to cmgr? @@ -147,46 +147,103 @@ def _connect_db(self): # -------------------------------------------------------------------------- # - def _connect_communication(self): + def _connect_client(self): # when running on the same host as the client, we may be able to bypass # MongoDB and instead connect to the client's ZMQ communication # channels. # - cfg = self._cfg.pilot_comm + cfg = self._cfg.client_comm + + # connect to the client's scheduler pubsub (to get tasks) + self._client_input = ru.zmq.Subscriber( + channel='agent_staging_input_pubsub', + url=cfg.agent_staging_input_pubsub.sub, + cb=self._client_input_cb, + log=self._log, + prof=self._prof) + self._client_input.subscribe(self._pid) + + # completed tasks are fed back to the tmgr staging output queue + self._log.debug('=== reg output: %s', cfg.tmgr_staging_output_queue.put) + self._client_output = ru.zmq.Putter(rpc.TMGR_STAGING_OUTPUT_QUEUE, + url=cfg.tmgr_staging_output_queue.put) + self._log.debug('=== reg output: ok') + + # and control pubsub (to register) + self._client_ctrl = ru.zmq.Publisher(channel='control_pubsub', + url=cfg.control_pubsub.sub, + log=self._log, + prof=self._prof) + + # and listen for completed tasks to foward to client + self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, + rpc.AGENT_COLLECTING_QUEUE, + self._agent_collect_cb) + + # allow control pubsub to connect + time.sleep(1) + + # how do we verify that the comm channel is up? + self._client_ctrl.put(rpc.CONTROL_PUBSUB, + msg={'cmd': 'pilot_register', + 'arg': {'pid': self._pid}}) - ru.write_json('pmgr_comm_pubsub.cfg', cfg.comm) - self.register_subscriber('pmgr_comm_pubsub', self._pilot_comm_cb) - self.register_publisher('pmgr_comm_pubsub') - self._client_input = ru.zmq.Getter(cfg.input['channel'], - cfg.input['get'], - self._client_input_cb) + # -------------------------------------------------------------------------- + # + def _client_input_cb(self, msg): - self._client_output = ru.zmq.Putter(cfg.output['channel'], - cfg.output['get']) + self._log.debug('=== input cb: %s', msg) - # allo comm pubsub to connect - time.sleep(1) + for task in msg: - # how do we verify that the comm channel is up? - self.publish('pmgr_comm_pubsub', msg={'cmd': 'pilot_connect', - 'arg': {'pid' : self._pid, - 'input' : cfg.input, - 'output': cfg.output}}) + # make sure the tasks obtain env settings (if needed) + if 'task_environment' in self._cfg: + if not task['description'].get('environment'): + task['description']['environment'] = dict() + for k,v in self._cfg['task_environment'].items(): + task['description']['environment'][k] = v + + # we need to make sure to have the correct state: + # task['state'] = rps._task_state_collapse(task['states']) + # self._prof.prof('get', uid=task['uid']) + + # FIXME: raise or fail task! + if task['state'] != rps.AGENT_STAGING_INPUT_PENDING: + self._log.error('=== invalid state: %s:%s:%s', task['uid'], + task['state'], task.get('states')) + + task['control'] = 'agent' + + # now we really own the CUs, and can start working on them (ie. push + # them into the pipeline). We don't publish nor profile as advance, + # since that happened already on the module side when the state was set. + self.advance(msg, publish=False, push=True) # -------------------------------------------------------------------------- # - def _client_input_cb(self, msg): + def _agent_collect_cb(self, msg): + + self._log.debug('=== collect cb: %s', msg) + + if self._client_output: + self._log.debug('=== to client: %s', msg) + self._client_output.put(msg) + + else: + self._log.debug('=== to MongoDB: %s', msg) + for task in msg: + task['$all'] = True + task['control'] = 'tmgr_pending' + self.advance(msg, publish=True, push=False) - self._log.debug('=== input cb: %s %s', msg) - self._client_output.put(msg) # -------------------------------------------------------------------------- # - def _pilot_comm_cb(self, topic, msg): + def _client_ctrl_cb(self, topic, msg): self._log.debug('=== ctl sub cb: %s %s', topic, msg) @@ -252,10 +309,10 @@ def initialize(self): self.register_timed_cb(self._agent_command_cb, timer=self._cfg['db_poll_sleeptime']) - # register idle callback to pull for tasks - self._ingest = mt.Thread(target=self._ingest) - self._ingest.daemon = True - self._ingest.start() + # start ingest thread to pull in tasks + self._ingest_thread = mt.Thread(target=self._ingest) + self._ingest_thread.daemon = True + self._ingest_thread.start() # sub-agents are started, components are started, bridges are up: we are @@ -401,7 +458,8 @@ def _start_sub_agents(self): agent_lm = None for sa in self._cfg['agents']: - target = self._cfg['agents'][sa]['target'] + target = self._cfg['agents'][sa]['target'] + cmdline = None if target == 'local': @@ -512,6 +570,7 @@ def run(self): # ------------------------------------------------------------------ # spawn the sub-agent + assert(cmdline) self._log.info ('create sub-agent %s: %s' % (sa, cmdline)) _SA(sa, cmdline, log=self._log) @@ -680,6 +739,7 @@ def _check_control(self, _, msg): rpc_res = {'uid': arg['uid']} try: print(arg) + ret = None if req == 'hello' : ret = 'hello %s' % ' '.join(arg['arg']) @@ -746,7 +806,7 @@ def _check_tasks_cb(self): 'pilot' : self._pid, 'control' : 'agent_pending'}) if not task_cursor.count(): - self._log.info('tasks pulled: 0') + # self._log.info('tasks pulled: 0') time.sleep(self._cfg['db_poll_sleeptime']) return diff --git a/src/radical/pilot/agent/staging_output/default.py b/src/radical/pilot/agent/staging_output/default.py index 6a114f8a93..905e58d5cb 100644 --- a/src/radical/pilot/agent/staging_output/default.py +++ b/src/radical/pilot/agent/staging_output/default.py @@ -50,7 +50,9 @@ def initialize(self): rpc.AGENT_STAGING_OUTPUT_QUEUE, self.work) # we don't need an output queue -- tasks are picked up via mongodb - self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, None) # drop + self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, + rpc.AGENT_COLLECTING_QUEUE) + # -------------------------------------------------------------------------- @@ -73,12 +75,6 @@ def work(self, tasks): uid = task['uid'] - # From here on, any state update will hand control over to the tmgr - # again. The next task update should thus push *all* task details, - # not only state. - task['$all'] = True - task['control'] = 'tmgr_pending' - # we always dig for stdout/stderr self._handle_task_stdio(task) diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index f7dc71d7fc..cefc4d6b12 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -35,43 +35,20 @@ # stall_hwm and batch_size is 1 (no stalling, no bulking). # "bridges" : { - "agent_staging_input_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, - "agent_scheduling_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, - "agent_executing_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, - "agent_staging_output_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, + "agent_staging_input_queue" : {"kind": "queue"}, + "agent_scheduling_queue" : {"kind": "queue"}, + "agent_executing_queue" : {"kind": "queue"}, + "agent_staging_output_queue" : {"kind": "queue"}, - "funcs_req_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 1}, - "funcs_res_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 1}, + "funcs_req_queue" : {"kind": "queue"}, + "funcs_res_queue" : {"kind": "queue"}, - "agent_unschedule_pubsub" : { "kind" : "pubsub", - "log_level" : "error"}, - "agent_schedule_pubsub" : { "kind" : "pubsub", - "log_level" : "error"}, + "agent_unschedule_pubsub" : {"kind": "pubsub"}, + "agent_schedule_pubsub" : {"kind": "pubsub"}, - "control_pubsub" : { "kind" : "pubsub", - "log_level" : "error"}, - "state_pubsub" : { "kind" : "pubsub", - "log_level" : "error"} - # "log_pubsub" : { "kind" : "pubsub", - # "log_level" : "error"} + "control_pubsub" : {"kind": "pubsub"}, + "state_pubsub" : {"kind": "pubsub"} + # "log_pubsub" : {"kind": "pubsub"} }, "components" : { diff --git a/src/radical/pilot/configs/pmgr_default.json b/src/radical/pilot/configs/pmgr_default.json index c2430a1cb4..ea37d3054d 100644 --- a/src/radical/pilot/configs/pmgr_default.json +++ b/src/radical/pilot/configs/pmgr_default.json @@ -12,20 +12,12 @@ }, "bridges" : { - "pmgr_launching_queue" : {"kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, - - "pmgr_comm_pubsub" : {"kind" : "pubsub", - "log_level" : "debug", - "stall_hwm" : 0, - "bulk_size" : 0} + "pmgr_launching_queue" : {"kind": "queue"} }, "components" : { # how many instances of the respective components should be started - "pmgr_launching" : {"count" : 1} + "pmgr_launching" : {"count": 1} } } diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index f590c55d47..b35bc5039e 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -17,33 +17,18 @@ }, "bridges" : { - "log_pubsub" : {"kind" : "pubsub", - "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 1024}, - "state_pubsub" : {"kind" : "pubsub", - "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 1024}, - "control_pubsub" : {"kind" : "pubsub", - "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 1024}, + "log_pubsub" : {"kind": "pubsub"}, + "state_pubsub" : {"kind": "pubsub"}, + "control_pubsub" : {"kind": "pubsub"}, - "stager_request_queue" : {"kind" : "queue", - "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 0}, - "stager_response_pubsub": {"kind" : "pubsub", - "log_level" : "debug", - "stall_hwm" : 1, - "bulk_size" : 0} + "stager_request_queue" : {"kind": "queue" }, + "stager_response_pubsub": {"kind": "pubsub"} }, "components" : { # how many instances of the respective components should be started - "update" : { "count" : 1 }, - "stager" : { "count" : 1 } + "update" : {"count": 1}, + "stager" : {"count": 1} } } diff --git a/src/radical/pilot/configs/tmgr_default.json b/src/radical/pilot/configs/tmgr_default.json index c152c8ca5b..c0c245eb05 100644 --- a/src/radical/pilot/configs/tmgr_default.json +++ b/src/radical/pilot/configs/tmgr_default.json @@ -15,27 +15,13 @@ }, "bridges" : { - "tmgr_staging_input_queue" : {"kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, - "tmgr_scheduling_queue" : {"kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, + "tmgr_staging_input_queue" : {"kind": "queue" }, + "tmgr_scheduling_queue" : {"kind": "queue" }, + "agent_staging_input_pubsub": {"kind": "pubsub"}, + "tmgr_staging_output_queue" : {"kind": "queue" }, - "tmgr_staging_output_queue" : {"kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, - "tmgr_unschedule_pubsub" : {"kind" : "pubsub", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0}, - "tmgr_reschedule_pubsub" : {"kind" : "pubsub", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 0} + "tmgr_unschedule_pubsub" : {"kind": "pubsub"}, + "tmgr_reschedule_pubsub" : {"kind": "pubsub"} }, "components" : { diff --git a/src/radical/pilot/constants.py b/src/radical/pilot/constants.py index ac1b71a27b..ca54678b84 100644 --- a/src/radical/pilot/constants.py +++ b/src/radical/pilot/constants.py @@ -23,10 +23,12 @@ TMGR_STAGING_INPUT_COMPONENT = 'tmgr_staging_input' TMGR_STAGING_OUTPUT_COMPONENT = 'tmgr_staging_output' +AGENT_STAGING_INPUT_PUBSUB = 'agent_staging_input_pubsub' AGENT_STAGING_INPUT_QUEUE = 'agent_staging_input_queue' AGENT_SCHEDULING_QUEUE = 'agent_scheduling_queue' AGENT_EXECUTING_QUEUE = 'agent_executing_queue' AGENT_STAGING_OUTPUT_QUEUE = 'agent_staging_output_queue' +AGENT_COLLECTING_QUEUE = 'agent_collecting_queue' AGENT_STAGING_INPUT_COMPONENT = 'agent_staging_input' AGENT_SCHEDULING_COMPONENT = 'agent_scheduling' diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 04db0acc10..747fb71078 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -140,7 +140,8 @@ def __init__(self, session, cfg='default'): self._stager_queue = self.get_output_ep(rpc.STAGER_REQUEST_QUEUE) # we also listen for completed staging directives - self.register_subscriber(rpc.STAGER_RESPONSE_PUBSUB, self._staging_ack_cb) + self.register_subscriber(rpc.STAGER_RESPONSE_PUBSUB, + self._staging_ack_cb) self._active_sds = dict() self._sds_lock = ru.Lock('pmgr_sds_lock') diff --git a/src/radical/pilot/pmgr/launching/default.py b/src/radical/pilot/pmgr/launching/default.py index 478381d18c..ee10c5a42a 100644 --- a/src/radical/pilot/pmgr/launching/default.py +++ b/src/radical/pilot/pmgr/launching/default.py @@ -91,9 +91,6 @@ def initialize(self): self._rp_version, _, _, _, self._rp_sdist_name, self._rp_sdist_path = \ ru.get_version([self._mod_dir, self._root_dir]) - # create our own bridges to communicate with the pilots - self._bridges = list() - # -------------------------------------------------------------------------- # @@ -122,13 +119,6 @@ def finalize(self): self._log.exception('finalization error') - # -------------------------------------------------------------------------- - # - def _pmgr_comm_cb(self, topic, msg): - - self._log.debug('comm msg: %s', msg) - - # -------------------------------------------------------------------------- # def _control_cb(self, topic, msg): @@ -208,7 +198,8 @@ def _pilot_watcher_cb(self): for pid in self._checking: state = self._pilots[pid]['job'].state - self._log.debug('saga job state: %s %s %s', pid, self._pilots[pid]['job'], state) + self._log.debug('saga job state: %s %s %s', pid, self. + _pilots[pid]['job'], state) if state in [rs.job.DONE, rs.job.FAILED, rs.job.CANCELED]: pilot = self._pilots[pid]['pilot'] @@ -990,48 +981,17 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): agent_cfg['resource_cfg'] = copy.deepcopy(rcfg) agent_cfg['debug'] = self._log.getEffectiveLevel() + # pilot needs to know where to fetch tasks from and where to + # send tasks back to + agent_cfg['client_comm'] = { + 'agent_staging_input_pubsub': + ru.read_json('%s/agent_staging_input_pubsub.cfg' % self._cfg.path), + 'tmgr_staging_output_queue': + ru.read_json('%s/tmgr_staging_output_queue.cfg' % self._cfg.path), + 'control_pubsub': + ru.read_json('%s/control_pubsub.cfg' % self._cfg.path), + } - # the pilot also gets contact points points for some client side - # communication channels. If the pilot happens to be able to - # connect to them, they will get used for communication - # - otherwise we fall back to MongoDB. - # - # We will need a separate queue for each pilot from which the - # pilot can pull tasks, so that queue cannot be defined in - # a static config file. Instead we start it here and own that - # queue for as long as the agent lives (in practice, for as long - # as this pmgr lives). - - bcfg = ru.Config(cfg={'channel' : '%s.in' % pid, - 'type' : 'queue', - 'stall_hwm' : 1, - 'bulk_size' : 0, - 'path' : self._cfg.path}) - b_in = ru.zmq.Queue(bcfg) - b_in.start() - self._bridges.append(b_in) - - bcfg = ru.Config(cfg={'channel' : '%s.out' % pid, - 'type' : 'queue', - 'stall_hwm' : 1, - 'bulk_size' : 0, - 'path' : self._cfg.path}) - b_out = ru.zmq.Queue(bcfg) - b_out.start() - self._bridges.append(b_out) - - self.register_subscriber('pmgr_comm_pubsub', self._pmgr_comm_cb) - - comm_cfg = ru.read_json('%s/pmgr_comm_pubsub.cfg' % self._cfg.path) - - agent_cfg['pilot_comm'] = { - 'input' : {'channel': b_in.channel, - 'put' : str(b_in.addr_put), - 'get' : str(b_in.addr_get)}, - 'output' : {'channel': b_out.channel, - 'put' : str(b_out.addr_put), - 'get' : str(b_out.addr_get)}, - 'comm' : comm_cfg} # we'll also push the agent config into MongoDB pilot['cfg'] = agent_cfg diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 2218ec9d10..c8b6367215 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -58,16 +58,16 @@ def initialize(self): self._js_cache = dict() self._pilots = dict() self._pilots_lock = ru.RLock() + self._connected = list() # list of pilot conected by ZMQ self.register_input(rps.TMGR_STAGING_INPUT_PENDING, rpc.TMGR_STAGING_INPUT_QUEUE, self.work) + self.register_publisher(rpc.AGENT_STAGING_INPUT_PUBSUB) + # this queue is inaccessible, needs routing via mongodb self.register_output(rps.AGENT_STAGING_INPUT_PENDING, None) - # alternatively, we keep a registry of pilot specific queues to route - self._pilot_queues = dict() # pid : zmq.Putter - # we subscribe to the command channel to learn about pilots being added # to this task manager. self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) @@ -93,32 +93,52 @@ def _control_cb(self, topic, msg): if cmd == 'add_pilots': - pilots = arg.get('pilots', []) - - if not isinstance(pilots, list): - pilots = [pilots] + pilots = arg['pilots'] with self._pilots_lock: + for pilot in pilots: pid = pilot['uid'] self._log.debug('add pilot %s', pid) + if pid not in self._pilots: self._pilots[pid] = pilot elif cmd == 'pilot_connect': pid = arg['pid'] - cfg = arg['cfg'] - - self._pilot_queues[pid] = ru.zmq.Putter(cfg['input']['channel'], - cfg['input']['put']) + self._log.debug('register pilot %s', pid) - else: - self._log.debug('skip cmd %s', cmd) + if pid not in self._connected: + self._connected.append(pid) return True + # -------------------------------------------------------------------------- + # + def _advance_tasks(self, tasks, pid, state=None): + + if not state: + state = rps.AGENT_STAGING_INPUT_PENDING + + if pid not in self._connected: + + for task in tasks: + # pass control via MongoDB + task['$all'] = True + task['control'] = 'agent_pending' + self._log.debug('=== send to mdb: %d', len(tasks)) + + # perform and publish state update + self.advance(tasks, state, publish=True, push=False) + + # publish to the agent_staging_input_pubsub + self._log.debug('=== send to pq: %d', len(tasks)) + self.publish(rpc.AGENT_STAGING_INPUT_PUBSUB, msg=tasks, + topic=pid) + + # -------------------------------------------------------------------------- # def work(self, tasks): @@ -138,12 +158,6 @@ def work(self, tasks): if pid not in staging_tasks : staging_tasks[pid] = list() if pid not in no_staging_tasks: no_staging_tasks[pid] = list() - # no matter if we perform any staging or not, we will push the full - # task info to the DB on the next advance, and will pass control to - # the agent. - task['$all'] = True - task['control'] = 'agent_pending' - # check if we have any staging directives to be enacted in this # component actionables = list() @@ -280,16 +294,20 @@ def work(self, tasks): for pid in no_staging_tasks: - self.advance(no_staging_tasks[pid], rps.AGENT_STAGING_INPUT_PENDING, - publish=True, push=False) - - if pid in self._pilot_queues: - self._pilot_queues[pid].put(no_staging_tasks[pid]) + self._advance_tasks(no_staging_tasks[pid], pid) for pid in staging_tasks: for task,actionables in staging_tasks[pid]: - self._handle_task(task, actionables) + try: + self._handle_task(task, actionables) + self._advance_tasks([task], pid=task['pilot']) + + except Exception as e: + # FIXME: serialize exception + task['exception'] = str(e) + self._advance_tasks([task], pid=task['pilot'], + state=rps.FAILED) # -------------------------------------------------------------------------- @@ -423,14 +441,5 @@ def _handle_task(self, task, actionables): os.remove(tar_path) - # staging is done, we can advance the task at last - self.advance(task, rps.AGENT_STAGING_INPUT_PENDING, - publish=True, push=True) - - pid = task['pilot'] - if pid in self._pilot_queues: - self._pilot_queues[pid].put([task]) - - # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/tmgr/staging_output/default.py b/src/radical/pilot/tmgr/staging_output/default.py index 4337eda6f5..c66df5ee64 100644 --- a/src/radical/pilot/tmgr/staging_output/default.py +++ b/src/radical/pilot/tmgr/staging_output/default.py @@ -5,8 +5,7 @@ import os -import radical.saga as rs -import radical.utils as ru +import radical.saga as rs from ... import states as rps from ... import constants as rpc @@ -43,15 +42,8 @@ def initialize(self): self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.TMGR_STAGING_OUTPUT_QUEUE, self.work) - # we also listen on individual pilot queues - self._pilot_queues = dict() # pid : zmq.Getter - - # we subscribe to the command channel to learn about pilots being added - # to this task manager. - self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) - # we don't need an output queue -- tasks will be final - # + # -------------------------------------------------------------------------- # @@ -61,32 +53,13 @@ def finalize(self): self._cache[key].close() - # -------------------------------------------------------------------------- - # - def _control_cb(self, topic, msg): - - cmd = msg.get('cmd') - arg = msg.get('arg') - - if cmd == 'pilot_connect': - - pid = arg['pid'] - cfg = arg['cfg'] - - self._pilot_queues[pid] = ru.zmq.Getter(cfg['output']['channel'], - cfg['output']['get'], - self.work) - - else: - self._log.debug('skip cmd %s', cmd) - - return True - - # -------------------------------------------------------------------------- # def work(self, tasks): + if not isinstance(tasks, list): + tasks = [tasks] + self.advance(tasks, rps.TMGR_STAGING_OUTPUT, publish=True, push=False) # we first filter out any tasks which don't need any output staging, and diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index de6d9ef924..60b79ff4f0 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -150,6 +150,8 @@ def start_bridges(self, cfg=None): check if any bridges are defined under `cfg['bridges']` and start them ''' + self._log.debug('=== starting bridges (%s)', cfg) + self._prof.prof('start_bridges_start', uid=self._uid) timeout = self._cfg.heartbeat.timeout @@ -1274,7 +1276,7 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, # -------------------------------------------------------------------------- # - def publish(self, pubsub, msg): + def publish(self, pubsub, msg, topic=None): ''' push information into a publication channel ''' @@ -1282,7 +1284,10 @@ def publish(self, pubsub, msg): if not self._publishers.get(pubsub): raise RuntimeError("no msg route for '%s': %s" % (pubsub, msg)) - self._publishers[pubsub].put(pubsub, msg) + if not topic: + topic = pubsub + + self._publishers[pubsub].put(topic, msg) # ------------------------------------------------------------------------------ From 28d5ea3a8f40fc6998854c4dacb63e08fb7465ed Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 9 Mar 2021 17:56:04 +0100 Subject: [PATCH 011/171] snap --- bin/radical-pilot-agent-funcs | 4 +- examples/01_task_details.py | 12 +- src/radical/pilot/agent/agent_0.py | 103 ++++++++++++------ src/radical/pilot/agent/executing/funcs.py | 2 +- src/radical/pilot/configs/agent_default.json | 2 +- src/radical/pilot/pilot_manager.py | 45 ++++++-- src/radical/pilot/task_manager.py | 2 +- .../pilot/tmgr/staging_input/default.py | 23 ++-- src/radical/pilot/utils/component.py | 5 +- src/radical/pilot/utils/prof_utils.py | 11 +- 10 files changed, 141 insertions(+), 68 deletions(-) diff --git a/bin/radical-pilot-agent-funcs b/bin/radical-pilot-agent-funcs index d549567e72..73983b142a 100755 --- a/bin/radical-pilot-agent-funcs +++ b/bin/radical-pilot-agent-funcs @@ -135,7 +135,7 @@ class Executor(object): while True: - # msgs = self._zmq_ctl.get_nowait(100) + # msgs = self._zmq_ctl.get_nowait(timeout=100) msgs = None time.sleep(1) @@ -173,7 +173,7 @@ class Executor(object): while not self._term.is_set(): - tasks = self._zmq_req.get_nowait(1000) + tasks = self._zmq_req.get_nowait(timeout=1000) if tasks: diff --git a/examples/01_task_details.py b/examples/01_task_details.py index 7b88bcdc22..c57a02eff4 100755 --- a/examples/01_task_details.py +++ b/examples/01_task_details.py @@ -63,13 +63,14 @@ 'project' : config[resource].get('project', None), 'queue' : config[resource].get('queue', None), 'access_schema' : config[resource].get('schema', None), - 'cores' : config[resource].get('cores', 1), + 'cores' : 1024 * 2, 'gpus' : config[resource].get('gpus', 0), } pdesc = rp.PilotDescription(pd_init) # Launch the pilot. pilot = pmgr.submit_pilots(pdesc) + # pmgr.wait_pilots(uids=pilot.uid, state=rp.PMGR_ACTIVE) report.header('submit tasks') # Register the pilot in a TaskManager object. @@ -77,10 +78,11 @@ # Create a workload of tasks. # Each task runs '/bin/date'. - n = 1024 # number of tasks to run - report.info('create %d task description(s)\n\t' % n) + n = 64 * 1024 # number of tasks to run + report.info('create %d task description(s)\n' % n) tds = list() + report.progress_tgt(n, label='create') for i in range(0, n): # create a new task description, and fill it. @@ -90,7 +92,7 @@ tds.append(td) report.progress() - report.ok('>>ok\n') + report.progress_done() # Submit the previously created task descriptions to the # PilotManager. This will trigger the selected scheduler to start @@ -131,7 +133,7 @@ # always clean up the session, no matter if we caught an exception or # not. This will kill all remaining pilots. report.header('finalize') - session.close(download=False) + session.close(download=True) report.header() diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 983ee631bc..89268fc749 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -159,42 +159,75 @@ def _connect_client(self): self._client_input = ru.zmq.Subscriber( channel='agent_staging_input_pubsub', url=cfg.agent_staging_input_pubsub.sub, + topic=self._pid, cb=self._client_input_cb, log=self._log, prof=self._prof) self._client_input.subscribe(self._pid) # completed tasks are fed back to the tmgr staging output queue - self._log.debug('=== reg output: %s', cfg.tmgr_staging_output_queue.put) self._client_output = ru.zmq.Putter(rpc.TMGR_STAGING_OUTPUT_QUEUE, url=cfg.tmgr_staging_output_queue.put) - self._log.debug('=== reg output: ok') - - # and control pubsub (to register) - self._client_ctrl = ru.zmq.Publisher(channel='control_pubsub', - url=cfg.control_pubsub.sub, - log=self._log, - prof=self._prof) # and listen for completed tasks to foward to client self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.AGENT_COLLECTING_QUEUE, self._agent_collect_cb) + # and control pubsub (to register) + self._client_ctrl_pub = ru.zmq.Publisher(channel=rpc.CONTROL_PUBSUB, + url=cfg.control_pubsub.pub, + log=self._log, + prof=self._prof) + + # and control pubsub (to register) + self._client_ctrl_sub = ru.zmq.Subscriber(channel=rpc.CONTROL_PUBSUB, + url=cfg.control_pubsub.sub, + log=self._log, + prof=self._prof, + topic=rpc.CONTROL_PUBSUB) + # allow control pubsub to connect time.sleep(1) - # how do we verify that the comm channel is up? - self._client_ctrl.put(rpc.CONTROL_PUBSUB, + # channels are set up, register client + self._client_ctrl_pub.put(rpc.CONTROL_PUBSUB, msg={'cmd': 'pilot_register', 'arg': {'pid': self._pid}}) + self._log.debug('pilot registered') + + # and wait for at most 30 seconds for acknoweldgement + start = time.time() + self._registered = False + while True: + + tout = 30 - (time.time() - start) + if tout < 0: + break + + topic, msg = self._client_ctrl_sub.get_nowait(timeout=tout) + if not msg: + continue + + cmd = msg['cmd'] + if cmd != 'pilot_register_ok': + continue + + pid = msg['arg']['pid'] + if pid == self._pid: + self._registered = True + break + + self._log.debug('registered: %s', self._registered) + + # -------------------------------------------------------------------------- # - def _client_input_cb(self, msg): + def _client_input_cb(self, topic, msg): - self._log.debug('=== input cb: %s', msg) + self._log.debug('=== input cb %s: %s', topic, len(msg)) for task in msg: @@ -211,7 +244,7 @@ def _client_input_cb(self, msg): # FIXME: raise or fail task! if task['state'] != rps.AGENT_STAGING_INPUT_PENDING: - self._log.error('=== invalid state: %s:%s:%s', task['uid'], + self._log.error('invalid state: %s:%s:%s', task['uid'], task['state'], task.get('states')) task['control'] = 'agent' @@ -226,14 +259,14 @@ def _client_input_cb(self, msg): # def _agent_collect_cb(self, msg): - self._log.debug('=== collect cb: %s', msg) + self._log.debug('=== collect cb: %s', len(msg)) if self._client_output: - self._log.debug('=== to client: %s', msg) + self._log.debug('=== to client: %s', len(msg)) self._client_output.put(msg) else: - self._log.debug('=== to MongoDB: %s', msg) + self._log.debug('=== to MongoDB: %s', len(msg)) for task in msg: task['$all'] = True task['control'] = 'tmgr_pending' @@ -309,24 +342,32 @@ def initialize(self): self.register_timed_cb(self._agent_command_cb, timer=self._cfg['db_poll_sleeptime']) - # start ingest thread to pull in tasks - self._ingest_thread = mt.Thread(target=self._ingest) - self._ingest_thread.daemon = True - self._ingest_thread.start() - + # # start ingest thread to pull in tasks + # self._ingest_thread = mt.Thread(target=self._ingest) + # self._ingest_thread.daemon = True + # self._ingest_thread.start() # sub-agents are started, components are started, bridges are up: we are - # ready to roll! Update pilot state. - pilot = {'type' : 'pilot', - 'uid' : self._pid, - 'state' : rps.PMGR_ACTIVE, - 'resource_details' : { - 'lm_info' : self._rm.lm_info.get('version_info'), - 'lm_detail' : self._rm.lm_info.get('lm_detail'), - 'rm_info' : self._rm.rm_info}, - '$set' : ['resource_details']} + # ready to roll! Send state update via client control pubsub + rm_info = self._rm.rm_info + n_nodes = len(rm_info['node_list']) + pilot = { + 'type' : 'pilot', + 'uid' : self._pid, + 'state' : rps.PMGR_ACTIVE, + 'resources': {'rm_info': rm_info, + 'cpu' : rm_info['cores_per_node'] * n_nodes, + 'gpu' : rm_info['gpus_per_node'] * n_nodes}} + + self._client_ctrl_pub.put(topic=rpc.CONTROL_PUBSUB, + msg={'cmd': 'pilot_activate', + 'arg': {'pilot': pilot}}) + + # also update via MongoDB as fallback self.advance(pilot, publish=True, push=False) + self._log.debug('=== sent pilot_activate') + # -------------------------------------------------------------------------- # @@ -819,7 +860,7 @@ def _check_tasks_cb(self): {'$set' : {'control' : 'agent'}}, multi=True) - self._log.info("tasks pulled: %4d", len(task_list)) + self._log.info("=== tasks pulled from db: %4d", len(task_list)) self._prof.prof('get', msg='bulk: %d' % len(task_list), uid=self._pid) for task in task_list: diff --git a/src/radical/pilot/agent/executing/funcs.py b/src/radical/pilot/agent/executing/funcs.py index 1113b61148..7156597403 100644 --- a/src/radical/pilot/agent/executing/funcs.py +++ b/src/radical/pilot/agent/executing/funcs.py @@ -208,7 +208,7 @@ def _collect(self): while not self._terminate.is_set(): # pull tasks from "funcs_out_queue" - tasks = self._res_queue.get_nowait(1000) + tasks = self._res_queue.get_nowait(timeout=1000) if tasks: diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index cefc4d6b12..2d9941d693 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -54,7 +54,7 @@ "components" : { # the update worker must live in agent.0, since only that agent is # sure to have connectivity toward the DB. - "update" : {"count" : 1}, + # "update" : {"count" : 1}, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, "agent_executing" : {"count" : 1}, diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 747fb71078..bb8455689b 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -7,6 +7,7 @@ import os import time import threading as mt +from radical.pilot.constants import PILOT_STATE import radical.utils as ru @@ -156,6 +157,9 @@ def __init__(self, session, cfg='default'): # also listen to the state pubsub for pilot state changes self.register_subscriber(rpc.STATE_PUBSUB, self._state_sub_cb) + # also listen to the state control for pilot activation + self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_sub_cb) + # let session know we exist self._session._register_pmgr(self) @@ -286,8 +290,7 @@ def _state_pull_cb(self): for pilot_dict in pilot_dicts: self._log.debug('state pulled: %s: %s', pilot_dict['uid'], pilot_dict['state']) - if not self._update_pilot(pilot_dict, publish=True): - return False + self._update_pilot(pilot_dict, publish=True) return True @@ -321,12 +324,32 @@ def _state_sub_cb(self, topic, msg): # we got the state update from the state callback - don't # publish it again - if not self._update_pilot(thing, publish=False): - return False + self._update_pilot(thing, publish=False) return True + # -------------------------------------------------------------------------- + # + def _control_sub_cb(self, topic, msg): + + if self._terminate.is_set(): + return False + + cmd = msg['cmd'] + arg = msg['arg'] + + self._log.debug('=== got control cmd %s: %s', cmd, arg) + + if cmd == 'pilot_activate': + pilot = arg['pilot'] + self._update_pilot(pilot, publish=True) + + # store resource json for RA + fname = '%s/%s.resources.json' % (self._cfg.path, pilot['uid']) + ru.write_json(fname, pilot['resources']) + + # -------------------------------------------------------------------------- # def _update_pilot(self, pilot_dict, publish=False, advance=True): @@ -340,25 +363,25 @@ def _update_pilot(self, pilot_dict, publish=False, advance=True): # we don't care about pilots we don't know if pid not in self._pilots: - return True # this is not an error + return # this is not an error # only update on state changes current = self._pilots[pid].state target = pilot_dict['state'] if current == target: - return True + return target, passed = rps._pilot_state_progress(pid, current, target) - # print '%s current: %s' % (pid, current) - # print '%s target : %s' % (pid, target ) - # print '%s passed : %s' % (pid, passed ) + self._log.debug('=== %s current: %s', pid, current) + self._log.debug('=== %s target : %s', pid, target ) + self._log.debug('=== %s passed : %s', pid, passed ) if target in [rps.CANCELED, rps.FAILED]: # don't replay intermediate states passed = passed[-1:] for s in passed: - # print '%s advance: %s' % (pid, s ) + self._log.debug('=== %s advance: %s', pid, s ) # we got state from either pubsub or DB, so don't publish again. # we also don't need to maintain bulks for that reason. pilot_dict['state'] = s @@ -372,8 +395,6 @@ def _update_pilot(self, pilot_dict, publish=False, advance=True): pilot_dict.get('lm_info'), pilot_dict.get('lm_detail')) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index 5169c18b00..eafcdbc2c5 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -877,7 +877,7 @@ def submit_tasks(self, descriptions): # insert tasks into the database, as a bulk. task_docs = [u.as_dict() for u in tasks] - self._session._dbs.insert_tasks(task_docs) + # self._session._dbs.insert_tasks(task_docs) # Only after the insert can we hand the tasks over to the next # components (ie. advance state). diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index c8b6367215..e1b0308a73 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -104,14 +104,18 @@ def _control_cb(self, topic, msg): if pid not in self._pilots: self._pilots[pid] = pilot - elif cmd == 'pilot_connect': + elif cmd == 'pilot_register': pid = arg['pid'] - self._log.debug('register pilot %s', pid) + self._log.debug('=== register pilot %s', pid) if pid not in self._connected: self._connected.append(pid) + # let pilot know that tasks will arive via ZMQ + self.publish(rpc.CONTROL_PUBSUB, msg={'cmd': 'pilot_register_ok', + 'arg': {'pid': pid}}) + return True @@ -124,11 +128,16 @@ def _advance_tasks(self, tasks, pid, state=None): if pid not in self._connected: + self._log.debug('=== send to mdb: %d', len(tasks)) + for task in tasks: # pass control via MongoDB task['$all'] = True task['control'] = 'agent_pending' - self._log.debug('=== send to mdb: %d', len(tasks)) + + # insert tasks into the database, as a bulk. + self._session._dbs.insert_tasks(tasks) + # perform and publish state update self.advance(tasks, state, publish=True, push=False) @@ -284,12 +293,12 @@ def work(self, tasks): cmd = "tar xvf %s/%s -C %s" % (session_sbox.path, tar_name, session_sbox.path) - j = js_tmp.run_job(cmd) + j = js_tmp.run_job(cmd) j.wait() self._log.debug('untar : %s', cmd) - self._log.debug('untar : %s\n---\n%s\n---\n%s', - j.get_stdout_string(), j.get_stderr_string(), - j.exit_code) + # self._log.debug('untar : %s\n---\n%s\n---\n%s', + # j.get_stdout_string(), j.get_stderr_string(), + # j.exit_code) for pid in no_staging_tasks: diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 60b79ff4f0..9fe9cdc92a 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -150,8 +150,7 @@ def start_bridges(self, cfg=None): check if any bridges are defined under `cfg['bridges']` and start them ''' - self._log.debug('=== starting bridges (%s)', cfg) - + self._log.debug('starting bridges (%s)', cfg) self._prof.prof('start_bridges_start', uid=self._uid) timeout = self._cfg.heartbeat.timeout @@ -1062,7 +1061,7 @@ def work_cb(self): # FIXME: a simple, 1-thing caching mechanism would likely # remove the req/res overhead completely (for any # non-trivial worker). - things = input.get_nowait(200) # in microseconds + things = input.get_nowait(timeout=200) # microseconds if not things: return True diff --git a/src/radical/pilot/utils/prof_utils.py b/src/radical/pilot/utils/prof_utils.py index ba9f1d7f6f..35cfa4b185 100644 --- a/src/radical/pilot/utils/prof_utils.py +++ b/src/radical/pilot/utils/prof_utils.py @@ -688,11 +688,12 @@ def fix_uids(json): tree[uid]['description'] = dict() for pilot in sorted(json['pilot'], key=lambda k: k['uid']): - uid = pilot['uid'] - pmgr = pilot['pmgr'] - pilot['cfg']['resource_details'] = pilot['resource_details'] - tree[pmgr]['children'].append(uid) - tree[uid] = {'uid' : uid, + pid = pilot['uid'] + pmgr = pilot['pmgr'] + details = ru.read_json('%s/%s.resources.json' % (src, pid)) + pilot['cfg']['resource_details'] = details + tree[pmgr]['children'].append(pid) + tree[pid] = {'uid' : pid, 'etype' : 'pilot', 'cfg' : pilot['cfg'], 'resources' : pilot['resources'], From 080cf12708c5b2b48f35dd5b4cb81d10ff18bc2c Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 9 Mar 2021 18:00:09 +0100 Subject: [PATCH 012/171] add agent bridge --- bin/radical-pilot-agent-bridge | 367 +++++++++++++++++++ tests/integration_tests/test_agent_bridge.py | 43 +++ 2 files changed, 410 insertions(+) create mode 100755 bin/radical-pilot-agent-bridge create mode 100755 tests/integration_tests/test_agent_bridge.py diff --git a/bin/radical-pilot-agent-bridge b/bin/radical-pilot-agent-bridge new file mode 100755 index 0000000000..955b1befe7 --- /dev/null +++ b/bin/radical-pilot-agent-bridge @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 + +import sys +import zmq +import time +import queue + +import threading as mt +import multiprocessing as mp +import radical.utils as ru + + +_TIMEOUT = 300 # time to keep the bridge alive +_LINGER_TIMEOUT = 250 # ms to linger after close +_HIGH_WATER_MARK = 0 # number of messages to buffer before dropping + # 0: infinite + + +# ------------------------------------------------------------------------------ +# This ZMQ bridge links clients and agents, and bridges network gaps. As such +# it needs to run on a resource which has a public IP address that can be +# reached from both the client and the server machine. +# +# The bridge listens on a `REP` socket (`bridge_request`) for incoming client or +# agent connections, identified by a common session ID. A client connection +# will trigger the creation of the following communication channels: +# +# - control_pubsub_bridge +# links client and agent control pubsubs (includes heartbeat) +# - state_pubsub_bridge +# forwards task state updates from agents to client +# - agent_input_queue +# forwards tasks from the client to the agents +# - agent_output_queue +# forwards tasks from the agents to the client +# +# +# The protocol on the `bridge_request` channel is as follows: +# +# client_register +# --------------- +# +# request: +# 'cmd': 'client_register' +# 'arg': 'sid': +# +# reply: +# 'res': {'control_pubsub' : {'sub': , 'pub': }, +# 'state_pubsub' : {'sub': , 'pub': }, +# 'agent_input_queue' : {'put': , 'get': }, +# 'agent_output_queue': {'put': , 'get': }} +# +# notes: +# - the request will fail if the session ID is known from another +# `client_register` call +# 'err': 'sid already connected' +# - this request should otherwise always succeed +# - the created pubsub channels will be terminated if the control channel +# has not seen a client heartbeat for <10 * heartbeat_interval> seconds +# - see semantics of the 'client_unregister' request for details. +# - the same termination semantics holds for the 'client_unregister' +# request. +# - any agent queues which exist for that session at the time of +# termination will also be closed, disregarding ay data hekd in those +# queues. +# +# +# client_lookup +# --------------- +# +# request: +# 'cmd': 'client_lookup' +# 'arg': 'sid': +# +# reply: +# 'res': {'control_pubsub' : {'sub': , 'pub': }, +# 'state_pubsub' : {'sub': , 'pub': }, +# 'agent_input_queue' : {'put': , 'get': }, +# 'agent_output_queue': {'put': , 'get': }} +# +# notes: +# - the request will fail if the session ID is not registered (anymore) +# - this request should otherwise always succeed +# - the call returns the same information as `client_register`, but does +# not alter the state of the client's bridge in any other way. +# - the request does not count as a heartbeat +# +# +# client_unregister +# ----------------- +# +# request: +# 'cmd': 'client_unregister' +# 'arg': 'sid': +# +# reply: +# 'res': 'ok' +# +# - this method only fails when the session is not connected, with +# 'err': 'session not connected' +# - in all other cases, the request will cause the immediate termination of +# all ZMQ bridges (pubsubs and queues) previously created for that +# session, disregarding of their state, and disposing all undelivered +# messages still held in the bridges. +# +# +# client_heartbeat +# ---------------- +# +# request: +# 'cmd': 'client_heartbeat' +# 'arg': 'sid': +# +# reply: +# 'res': {'time': } +# +# notes: +# - this request will fail if the session is either not connected or timed +# because of an earlier heartbeat failure: +# 'err': 'session not connected' +# - it will otherwise ensure the server that the client is still alive and +# requires the bridge to be up. If the server does not receive a heartbeat +# for longer than TIMEOUT seconds, the bridge will be terminated. +# +# +# default error mode +# ------------------ +# +# To any request other than the above, the ZMQ bridge will respond: +# 'err': 'invalid request' +# +# ------------------------------------------------------------------------------ + +# ------------------------------------------------------------------------------ +# +class ZMQBridge(ru.zmq.Server): + + def __init__(self): + + self._lock = mt.Lock() + self._clients = dict() + + ru.zmq.Server.__init__(self) + + self._monitor = mt.Thread(target=self._monitor) + self._monitor.daemon = True + self._monitor.start() + + self.register_request('client_register', self._client_register) + self.register_request('client_lookup', self._client_lookup) + self.register_request('client_unregister', self._client_unregister) + self.register_request('client_heartbeat', self._client_heartbeat) + + + # -------------------------------------------------------------------------- + # + def _monitor(self): + + # this is a daemon thread - it never exits until process termination + while True: + + time.sleep(10) + now = time.time() + + # iterate w/o lock, and thus get a snapshot of the known sids + sids = list(self._clients.keys()) + + to_terminate = list() + for sid in sids: + + client = self._clients.get(sid) + if not client: + continue + + if now > (client['hb'] + _TIMEOUT): + self._log.warn('client %s timed out' % sid) + to_terminate.append(sid) + + if not to_terminate: + continue + + with self._lock: + + for sid in to_terminate: + + client = self._clients.get(sid) + if not client: + continue + + client['term'].set() + client['proc'].join() + del(self._clients[sid]) + + + # -------------------------------------------------------------------------- + # + def stop(self): + + for sid in self._clients: + self._clients[sid]['term'].set() + + ru.zmq.Server.stop(self) + + + # -------------------------------------------------------------------------- + # + def _client_register(self, arg): + + sid = arg['sid'] + + if sid in self._clients: + return {'err': 'client already registered'} + + q = mp.Queue() + term = mp.Event() + proc = mp.Process(target=self._worker, args=[sid, q, term]) + proc.start() + + try: + data = q.get(timeout=10) + except queue.Empty: + proc.terminate() + return {'err': 'worker startup failed'} + + self._clients[sid] = {'proc': proc, + 'term': term, + 'data': data, + 'hb' : time.time()} + + return {'res': self._clients[sid]['data']} + + + # -------------------------------------------------------------------------- + # + def _worker(self, sid, q, term): + + log = ru.Logger('radical.pilot.bridge', level='debug', path=sid) + + control_pubsub = None + state_pubsub = None + agent_queue = None + + try: + control_pubsub = ru.zmq.PubSub(cfg={'channel': 'control_pubsub', + 'uid' : 'control_pubsub', + 'type' : 'pubsub', + 'path' : sid}) + + state_pubsub = ru.zmq.PubSub(cfg={'channel': 'state_pubsub', + 'uid' : 'state_pubsub', + 'type' : 'pubsub', + 'path' : sid}) + + agent_queue = ru.zmq.Queue (cfg={'channel': 'agent_queue', + 'uid' : 'agent_queue', + 'type' : 'queue', + 'path' : sid}) + + control_pubsub.start() + state_pubsub.start() + agent_queue.start() + + data = {'control_pubsub' : {'pub': str(control_pubsub.addr_pub), + 'sub': str(control_pubsub.addr_sub)}, + 'state_pubsub' : {'pub': str(state_pubsub.addr_pub), + 'sub': str(state_pubsub.addr_sub)}, + 'agent_queue' : {'put': str(agent_queue._addr_put), + 'get': str(agent_queue._addr_get)}} + + # inform service about endpoint details + q.put(data) + + # we run forever until we receive a termination command + log.info('work') + term.wait() + + + except: + log.exception('worker failed') + + finally: + + if control_pubsub: control_pubsub.stop() + if state_pubsub : state_pubsub.stop() + if agent_queue : agent_queue.stop() + + log.info('terminated') + + + # -------------------------------------------------------------------------- + # + def _client_lookup(self, arg): + + sid = arg['sid'] + + with self._lock: + if sid not in self._clients: + return {'err': 'client not registered'} + + return {'res': self._clients[sid]['data']} + + + # -------------------------------------------------------------------------- + # + def _client_unregister(self, arg): + + sid = arg['sid'] + + with self._lock: + + if sid not in self._clients: + return {'err': 'client not registered'} + + self._clients[sid]['term'].set() + self._clients[sid]['proc'].join() + + del(self._clients[sid]) + + return {'res': 'ok'} + + + # -------------------------------------------------------------------------- + # + def _client_heartbeat(self, arg): + + sid = arg['sid'] + now = time.time() + + with self._lock: + + if sid not in self._clients: + return {'err': 'client not registered'} + + self._clients[sid]['hb'] = now + + return {'res': 'ok'} + + +# ------------------------------------------------------------------------------ +# +if __name__ == '__main__': + + bridge = ZMQBridge() + + try: + sys.stdout.write('uid : %s\n' % bridge.uid) + sys.stdout.flush() + + bridge.start() + + sys.stdout.write('addr: %s\n' % bridge.addr) + ru.write_json('%s.cfg' % bridge.uid, {'addr': bridge.addr}) + + # run forever until process is interrupted or killed + while True: + time.sleep(1) + + finally: + print('final') + bridge.stop() + print('stopped') + bridge.wait() + print('waited') + + +# ------------------------------------------------------------------------------ + diff --git a/tests/integration_tests/test_agent_bridge.py b/tests/integration_tests/test_agent_bridge.py new file mode 100755 index 0000000000..e099d1e0b0 --- /dev/null +++ b/tests/integration_tests/test_agent_bridge.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import time + +import radical.utils as ru + + +# ------------------------------------------------------------------------------ +# +def test_agent_bridge(sid='foo'): + + bridge = ru.zmq.Client('server.0000') + + try: + print(bridge.request('client_register', {'sid': sid})) + print(bridge.request('client_lookup', {'sid': sid})) + time.sleep(3) + print(bridge.request('client_heartbeat', {'sid': sid})) + time.sleep(3) + print(bridge.request('client_heartbeat', {'sid': sid})) + time.sleep(3) + print(bridge.request('client_heartbeat', {'sid': sid})) + time.sleep(3) + print(bridge.request('client_heartbeat', {'sid': sid})) + time.sleep(3) + print(bridge.request('client_heartbeat', {'sid': sid})) + print(bridge.request('client_lookup', {'sid': sid})) + print(bridge.request('client_unregister', {'sid': sid})) + print(bridge.request('client_lookup', {'sid': sid})) + + finally: + bridge.close() + + +# ------------------------------------------------------------------------------ +# +if __name__ == '__main__': + + test_agent_bridge() + + +# ------------------------------------------------------------------------------ + From 6901652adbb2e3b6a9592619de04f2a81d4f8286 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 9 Mar 2021 18:02:17 +0100 Subject: [PATCH 013/171] add bin deployment --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 3e36425154..bfb11ee883 100755 --- a/setup.py +++ b/setup.py @@ -218,6 +218,10 @@ def run(self): 'packages' : find_namespace_packages('src', include=['radical.*']), 'package_dir' : {'': 'src'}, 'scripts' : [ + 'bin/radical-pilot-agent', + 'bin/radical-pilot-agent-bridge', + 'bin/radical-pilot-agent-funcs', + 'bin/radical-pilot-agent-statepush', 'bin/radical-pilot-bridge', 'bin/radical-pilot-bson2json', 'bin/radical-pilot-cleanup', @@ -236,9 +240,6 @@ def run(self): 'bin/radical-pilot-stats', 'bin/radical-pilot-stats.plot', 'bin/radical-pilot-version', - 'bin/radical-pilot-agent', - 'bin/radical-pilot-agent-funcs', - 'bin/radical-pilot-agent-statepush', 'bin/radical-pilot-worker', ], 'package_data' : {'': ['*.txt', '*.sh', '*.json', '*.gz', '*.c', From 1f593537fd9414c598bce4dd4f2365cd5b150238 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 20 Mar 2021 17:44:56 +0100 Subject: [PATCH 014/171] no mongodb connection in primary session --- examples/02_failing_tasks.py | 2 +- examples/03_multiple_pilots.py | 2 +- examples/data_staging/io_staging_dict.py | 2 +- examples/data_staging/io_staging_pipeline.py | 2 +- examples/data_staging/io_staging_shared.py | 2 +- examples/data_staging/io_staging_simple.py | 2 +- examples/docs/chained_tasks.py | 2 +- examples/docs/coupled_tasks.py | 2 +- examples/docs/mpi_tasks.py | 2 +- examples/docs/simple_bot.py | 2 +- examples/docs/simple_bot_mult_res.py | 2 +- examples/getting_started_osg.py | 4 +- examples/getting_started_osg_2.py | 2 +- examples/misc/backfilling.py | 2 +- examples/misc/backfilling_recovery.py | 2 +- src/radical/pilot/configs/agent_cray.json | 4 - .../pilot/configs/agent_cray_aprun.json | 5 - src/radical/pilot/configs/agent_debug_sa.json | 2 - src/radical/pilot/configs/agent_default.json | 15 -- .../pilot/configs/agent_default_sa.json | 12 - src/radical/pilot/configs/agent_osg.json | 4 - src/radical/pilot/configs/agent_rhea.json | 4 - src/radical/pilot/configs/agent_scale.json | 12 - .../pilot/configs/agent_summit_sa.json | 4 - .../pilot/configs/session_default.json | 6 +- src/radical/pilot/constants.py | 2 - src/radical/pilot/pilot_manager.py | 10 +- src/radical/pilot/session.py | 237 ++++++---------- .../pilot/tmgr/staging_input/default.py | 2 +- src/radical/pilot/utils/component.py | 2 - src/radical/pilot/worker/__init__.py | 1 - src/radical/pilot/worker/update.py | 252 ------------------ tests/integration_tests/test_agent_bridge.py | 16 +- 33 files changed, 111 insertions(+), 511 deletions(-) delete mode 100644 src/radical/pilot/worker/update.py diff --git a/examples/02_failing_tasks.py b/examples/02_failing_tasks.py index e11cf0fb5e..20483fe5e7 100755 --- a/examples/02_failing_tasks.py +++ b/examples/02_failing_tasks.py @@ -142,7 +142,7 @@ # not. This will kill all remaining pilots. report.header('finalize') if session: - session.close(cleanup=False) + session.close() report.header() diff --git a/examples/03_multiple_pilots.py b/examples/03_multiple_pilots.py index 0b3f5bb0e0..26120b75b4 100755 --- a/examples/03_multiple_pilots.py +++ b/examples/03_multiple_pilots.py @@ -135,7 +135,7 @@ # always clean up the session, no matter if we caught an exception or # not. This will kill all remaining pilots. report.header('finalize') - session.close(cleanup=False) + session.close() report.header() diff --git a/examples/data_staging/io_staging_dict.py b/examples/data_staging/io_staging_dict.py index 2abaa711e3..403394839e 100755 --- a/examples/data_staging/io_staging_dict.py +++ b/examples/data_staging/io_staging_dict.py @@ -140,7 +140,7 @@ def task_state_cb (task, state): # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/data_staging/io_staging_pipeline.py b/examples/data_staging/io_staging_pipeline.py index 0eafd6b3df..cc8dc15315 100755 --- a/examples/data_staging/io_staging_pipeline.py +++ b/examples/data_staging/io_staging_pipeline.py @@ -160,7 +160,7 @@ def task_state_cb (task, state): # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/data_staging/io_staging_shared.py b/examples/data_staging/io_staging_shared.py index 880fa84b73..27c0ad4eca 100755 --- a/examples/data_staging/io_staging_shared.py +++ b/examples/data_staging/io_staging_shared.py @@ -139,7 +139,7 @@ # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/data_staging/io_staging_simple.py b/examples/data_staging/io_staging_simple.py index ef2802967f..331e997899 100755 --- a/examples/data_staging/io_staging_simple.py +++ b/examples/data_staging/io_staging_simple.py @@ -129,7 +129,7 @@ def task_state_cb (task, state): # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/docs/chained_tasks.py b/examples/docs/chained_tasks.py index f973421554..19fcdb4f9d 100755 --- a/examples/docs/chained_tasks.py +++ b/examples/docs/chained_tasks.py @@ -164,7 +164,7 @@ # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/docs/coupled_tasks.py b/examples/docs/coupled_tasks.py index aab6b6fad0..1bbd066302 100755 --- a/examples/docs/coupled_tasks.py +++ b/examples/docs/coupled_tasks.py @@ -174,7 +174,7 @@ # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/docs/mpi_tasks.py b/examples/docs/mpi_tasks.py index d67efb6064..8c0e910c3c 100755 --- a/examples/docs/mpi_tasks.py +++ b/examples/docs/mpi_tasks.py @@ -177,7 +177,7 @@ def task_state_cb (task, state): # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/docs/simple_bot.py b/examples/docs/simple_bot.py index ba53bd6d41..3d18e204d3 100755 --- a/examples/docs/simple_bot.py +++ b/examples/docs/simple_bot.py @@ -125,7 +125,7 @@ # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots. diff --git a/examples/docs/simple_bot_mult_res.py b/examples/docs/simple_bot_mult_res.py index af522faa12..38cb9daf50 100644 --- a/examples/docs/simple_bot_mult_res.py +++ b/examples/docs/simple_bot_mult_res.py @@ -193,7 +193,7 @@ def task_state_cb (task, state): # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/getting_started_osg.py b/examples/getting_started_osg.py index f6d235bace..d74efab040 100755 --- a/examples/getting_started_osg.py +++ b/examples/getting_started_osg.py @@ -154,11 +154,11 @@ def wait_queue_size_cb(tmgr, wait_queue_size): # always clean up the session, no matter if we caught an exception or # not. print("closing session") - session.close (cleanup=False) + session.close () # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/getting_started_osg_2.py b/examples/getting_started_osg_2.py index 4156fdc2e0..42cf850e0b 100755 --- a/examples/getting_started_osg_2.py +++ b/examples/getting_started_osg_2.py @@ -196,7 +196,7 @@ def pilot_cb(pilot, state): report.header('finalize') report.header() if session: - session.close(cleanup=False) + session.close() report.header() diff --git a/examples/misc/backfilling.py b/examples/misc/backfilling.py index c661ec43e8..d2c88a6289 100755 --- a/examples/misc/backfilling.py +++ b/examples/misc/backfilling.py @@ -150,7 +150,7 @@ def task_state_cb (task, state): # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/examples/misc/backfilling_recovery.py b/examples/misc/backfilling_recovery.py index 3cecdc3531..77287762ff 100755 --- a/examples/misc/backfilling_recovery.py +++ b/examples/misc/backfilling_recovery.py @@ -179,7 +179,7 @@ def wait_queue_size_cb(tmgr, wait_queue_size): # the above is equivalent to # - # session.close (cleanup=True, terminate=True) + # session.close (terminate=True) # # it will thus both clean out the session's database record, and kill # all remaining pilots (none in our example). diff --git a/src/radical/pilot/configs/agent_cray.json b/src/radical/pilot/configs/agent_cray.json index bbe309dc27..31413b4333 100644 --- a/src/radical/pilot/configs/agent_cray.json +++ b/src/radical/pilot/configs/agent_cray.json @@ -7,7 +7,6 @@ "bulk_time" : 1.0, "bulk_size" : 1024, - "db_poll_sleeptime" : 1.0, "network_interface" : "ipogif0", "heartbeat" : { @@ -59,9 +58,6 @@ }, "components": { - # the update worker must live in agent.0, since only that agent is - # sure to have connectivity toward the DB. - "update" : {"count" : 1}, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, "agent_staging_output" : {"count" : 1} diff --git a/src/radical/pilot/configs/agent_cray_aprun.json b/src/radical/pilot/configs/agent_cray_aprun.json index d71400ff5b..ec0fcce4c7 100644 --- a/src/radical/pilot/configs/agent_cray_aprun.json +++ b/src/radical/pilot/configs/agent_cray_aprun.json @@ -1,12 +1,8 @@ { "staging_schema" : "staging", "max_io_loglength" : 1024, - "db_poll_sleeptime" : 1.0, "network_interface" : "ipogif0", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 @@ -36,7 +32,6 @@ }, "components": { - "update" : {"count" : 1}, "agent_executing" : {"count" : 1} }, diff --git a/src/radical/pilot/configs/agent_debug_sa.json b/src/radical/pilot/configs/agent_debug_sa.json index b07a1431b7..1a7e5954f6 100644 --- a/src/radical/pilot/configs/agent_debug_sa.json +++ b/src/radical/pilot/configs/agent_debug_sa.json @@ -3,7 +3,6 @@ "staging_area" : "staging_area", "staging_schema" : "staging", "max_io_loglength" : 1024, - "db_poll_sleeptime" : 5.0, "bulk_time" : 10.0, "bulk_size" : 4096, @@ -65,7 +64,6 @@ }, "components" : { - "update" : {"count" : 1}, "agent_staging_input" : {"count" : 4}, "agent_scheduling" : {"count" : 1}, "agent_executing" : {"count" : 4}, diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index 2d9941d693..a61816aa12 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -4,15 +4,6 @@ # a functional pilot agent, without any component redundency. { - # max number of updates to put into a db bulk - "bulk_collection_size" : 1024, - - # max time period to collect db notifications into bulks (seconds) - "bulk_collection_time" : 1.0, - - # time to sleep between database polls (seconds) - "db_poll_sleeptime" : 2.0, - # agent.0 must always have target 'local' at this point # mode 'shared' : local node is also used for CUs # mode 'reserved' : local node is reserved for the agent @@ -20,9 +11,6 @@ "target" : "local", "mode" : "shared", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 @@ -52,9 +40,6 @@ }, "components" : { - # the update worker must live in agent.0, since only that agent is - # sure to have connectivity toward the DB. - # "update" : {"count" : 1}, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, "agent_executing" : {"count" : 1}, diff --git a/src/radical/pilot/configs/agent_default_sa.json b/src/radical/pilot/configs/agent_default_sa.json index a6c38c3299..d44bc7d607 100644 --- a/src/radical/pilot/configs/agent_default_sa.json +++ b/src/radical/pilot/configs/agent_default_sa.json @@ -4,15 +4,6 @@ # a functional pilot agent, without any component redundency. { - # max number of updates to put into a db bulk - "bulk_collection_size" : 1024, - - # max time period to collect db notifications into bulks (seconds) - "bulk_collection_time" : 1.0, - - # time to sleep between database polls (seconds) - "db_poll_sleeptime" : 2.0, - # agent.0 must always have target 'local' at this point # mode 'shared' : local node is also used for CUs # mode 'reserved' : local node is reserved for the agent @@ -20,9 +11,6 @@ "target" : "local", "mode" : "shared", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 diff --git a/src/radical/pilot/configs/agent_osg.json b/src/radical/pilot/configs/agent_osg.json index 42f1a84aa8..4d65fd3026 100644 --- a/src/radical/pilot/configs/agent_osg.json +++ b/src/radical/pilot/configs/agent_osg.json @@ -5,12 +5,8 @@ { "staging_schema" : "staging", "max_io_loglength" : 1024, - "db_poll_sleeptime" : 1.0, "network_interface" : "lo", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 diff --git a/src/radical/pilot/configs/agent_rhea.json b/src/radical/pilot/configs/agent_rhea.json index 257880fb9b..cc6991cc5d 100644 --- a/src/radical/pilot/configs/agent_rhea.json +++ b/src/radical/pilot/configs/agent_rhea.json @@ -1,12 +1,8 @@ { "staging_schema" : "staging", "max_io_loglength" : 1024, - "db_poll_sleeptime" : 1.0, "network_interface" : "ipogif0", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 diff --git a/src/radical/pilot/configs/agent_scale.json b/src/radical/pilot/configs/agent_scale.json index eb10ebb5d5..b978a2cda7 100644 --- a/src/radical/pilot/configs/agent_scale.json +++ b/src/radical/pilot/configs/agent_scale.json @@ -4,15 +4,6 @@ # a functional pilot agent, without any component redundency. { - # max number of updates to put into a db bulk - "bulk_collection_size" : 1024, - - # max time period to collect db notifications into bulks (seconds) - "bulk_collection_time" : 1.0, - - # time to sleep between database polls (seconds) - "db_poll_sleeptime" : 2.0, - # agent.0 must always have target 'local' at this point # mode 'shared' : local node is also used for CUs # mode 'reserved' : local node is reserved for the agent @@ -20,9 +11,6 @@ "target" : "local", "mode" : "shared", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 diff --git a/src/radical/pilot/configs/agent_summit_sa.json b/src/radical/pilot/configs/agent_summit_sa.json index 0d25bef79a..7229f31497 100644 --- a/src/radical/pilot/configs/agent_summit_sa.json +++ b/src/radical/pilot/configs/agent_summit_sa.json @@ -6,12 +6,8 @@ { "staging_schema" : "staging", "max_io_loglength" : 1024, - "db_poll_sleeptime" : 5.0, # "network_interface" : "ipogif0", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index b35bc5039e..bb2a4ca985 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -4,13 +4,10 @@ # a functional rp session, both on the client and on the agent side. { "default_dburl": "mongodb://rct:rct_test@two.radical-project.org/rct_test", - "dburl" : "${RADICAL_PILOT_DBURL}", + "service_url" : "${RADICAL_PILOT_SERVICE_URL}", "session_base" : "${RADICAL_PILOT_SESSION_BASE:$PWD}", "record" : "${RADICAL_PILOT_SESSION_RECORD}", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 @@ -27,7 +24,6 @@ "components" : { # how many instances of the respective components should be started - "update" : {"count": 1}, "stager" : {"count": 1} } } diff --git a/src/radical/pilot/constants.py b/src/radical/pilot/constants.py index ca54678b84..59aef32bc4 100644 --- a/src/radical/pilot/constants.py +++ b/src/radical/pilot/constants.py @@ -6,8 +6,6 @@ MASTER = 'master' WORKER = 'worker' -UPDATE_WORKER = 'update' - STAGER_WORKER = 'stager' STAGER_REQUEST_QUEUE = 'stager_request_queue' STAGER_RESPONSE_PUBSUB = 'stager_response_pubsub' diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index bb8455689b..38886e14aa 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -339,7 +339,7 @@ def _control_sub_cb(self, topic, msg): cmd = msg['cmd'] arg = msg['arg'] - self._log.debug('=== got control cmd %s: %s', cmd, arg) + self._log.debug('got control cmd %s: %s', cmd, arg) if cmd == 'pilot_activate': pilot = arg['pilot'] @@ -372,16 +372,16 @@ def _update_pilot(self, pilot_dict, publish=False, advance=True): return target, passed = rps._pilot_state_progress(pid, current, target) - self._log.debug('=== %s current: %s', pid, current) - self._log.debug('=== %s target : %s', pid, target ) - self._log.debug('=== %s passed : %s', pid, passed ) + # self._log.debug('%s current: %s', pid, current) + # self._log.debug('%s target : %s', pid, target ) + # self._log.debug('%s passed : %s', pid, passed ) if target in [rps.CANCELED, rps.FAILED]: # don't replay intermediate states passed = passed[-1:] for s in passed: - self._log.debug('=== %s advance: %s', pid, s ) + self._log.debug('%s advance: %s', pid, s ) # we got state from either pubsub or DB, so don't publish again. # we also don't need to maintain bulks for that reason. pilot_dict['state'] = s diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 22444c7b0c..9f951f6a41 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -4,7 +4,6 @@ __license__ = "MIT" import os -import sys import copy import radical.utils as ru @@ -13,7 +12,6 @@ import radical.saga.utils.pty_shell as rsup from .constants import RESOURCE_CONFIG_LABEL_DEFAULT -from .db import DBSession from . import utils as rpu @@ -44,31 +42,30 @@ class Session(rs.Session): # -------------------------------------------------------------------------- # - def __init__(self, dburl=None, uid=None, cfg=None, _primary=True): + def __init__(self, service_url=None, uid=None, cfg=None, _primary=True): ''' Creates a new session. A new Session instance is created and stored in the database. **Arguments:** - * **dburl** (`string`): The MongoDB URL. If none is given, - RP uses the environment variable RADICAL_PILOT_DBURL. If that is - not set, an error will be raised. - + * **service_url** (`string`): The Bridge Service URL. + If none is given, RP uses the environment variable + RADICAL_PILOT_SERVICE_URL. If that is not set, an error will be + raised. * **cfg** (`str` or `dict`): a named or instantiated configuration to be used for the session. * **uid** (`string`): Create a session with this UID. Session UIDs MUST be unique - otherwise they will lead to conflicts in the underlying database, resulting in undefined behaviours (or worse). - * **_primary** (`bool`): only sessions created by the original - application process (via `rp.Session()`, will connect to the DB. + application process (via `rp.Session()`, will create comm bridges Secondary session instances are instantiated internally in processes spawned (directly or indirectly) by the initial session, for example in some of it's components. A secondary session will inherit the original session ID, but will not attempt to create - a new DB collection - if such a DB connection is needed, the - component needs to establish that on its own. + a new comm bridge - if such a bridge connection is needed, the + component will connect to the one created by the primary session. ''' # NOTE: `name` and `cfg` are overloaded, the user cannot point to @@ -79,7 +76,7 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True): name = cfg cfg = None - self._dbs = None + self._service = None self._closed = False self._primary = _primary @@ -90,28 +87,22 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True): self._cfg = ru.Config('radical.pilot.session', name=name, cfg=cfg) self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) - if _primary: + pwd = os.getcwd() - pwd = os.getcwd() - - if not self._cfg.sid: - if uid: - self._cfg.sid = uid - else: - self._cfg.sid = ru.generate_id('rp.session', - mode=ru.ID_PRIVATE) - if not self._cfg.base: - self._cfg.base = pwd + if not self._cfg.sid: + if uid: + self._cfg.sid = uid + else: + self._cfg.sid = ru.generate_id('rp.session', + mode=ru.ID_PRIVATE) + if not self._cfg.base: + self._cfg.base = pwd - if not self._cfg.path: - self._cfg.path = '%s/%s' % (self._cfg.base, self._cfg.sid) + if not self._cfg.path: + self._cfg.path = '%s/%s' % (self._cfg.base, self._cfg.sid) - if not self._cfg.client_sandbox: - self._cfg.client_sandbox = pwd - - else: - for k in ['sid', 'base', 'path']: - assert(k in self._cfg), 'non-primary session misses %s' % k + if not self._cfg.client_sandbox: + self._cfg.client_sandbox = pwd # change RU defaults to point logfiles etc. to the session sandbox def_cfg = ru.DefaultConfig() @@ -131,7 +122,7 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True): self._log.info('radical.saga version: %s' % rs.version_detail) self._log.info('radical.utils version: %s' % ru.version_detail) - self._prof.prof('session_start', uid=self._uid, msg=int(_primary)) + self._prof.prof('session_start', uid=self._uid) # now we have config and uid - initialize base class (saga session) rs.Session.__init__(self, uid=self._uid) @@ -145,53 +136,34 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True): 'js_shells' : dict(), 'fs_dirs' : dict()} - if _primary: - self._initialize_primary(dburl) - - # at this point we have a DB connection, logger, etc, and are done - self._prof.prof('session_ok', uid=self._uid, msg=int(_primary)) - - - # -------------------------------------------------------------------------- - def _initialize_primary(self, dburl): - self._rep.info ('<>err\n") - self._log.exception('session create failed [%s]' % - dburl_no_passwd) - raise RuntimeError ('session create failed [%s]' % - dburl_no_passwd) from e + # create/connect bridge handle on primary sessions + self._service = ru.zmq.Client(url=service_url) + self._cfg.bridge = self._service.request('client_register', + {'sid': self._uid}) + self._log.debug('=== bridge: %s', self._cfg.bridge) + + # FIXME MONGODB: to json + # self.inject_metadata({'radical_stack': + # {'rp': rp_version_detail, + # 'rs': rs.version_detail, + # 'ru': ru.version_detail, + # 'py': py_version_detail}}) # primary sessions have a component manager which also manages # heartbeat. 'self._cmgr.close()` should be called during termination @@ -210,16 +182,18 @@ def _initialize_primary(self, dburl): # create recording path and record session os.system('mkdir -p %s' % self._rec) - ru.write_json({'dburl': str(self.dburl)}, + ru.write_json({'service_url': str(self.service_url)}, "%s/session.json" % self._rec) self._log.info("recording session in %s" % self._rec) + # at this point we have a DB connection, logger, etc, and are done + self._prof.prof('session_ok', uid=self._uid) self._rep.ok('>>ok\n') # -------------------------------------------------------------------------- # context manager `with` clause - # FIXME: cleanup_on_close, terminate_on_close attributes? + # FIXME: terminate_on_close attributes? # def __enter__(self): return self @@ -230,16 +204,13 @@ def __exit__(self, exc_type, exc_value, traceback): # -------------------------------------------------------------------------- # - def close(self, cleanup=False, terminate=True, download=False): + def close(self, terminate=True, download=False): ''' Closes the session. All subsequent attempts access objects attached to - the session will result in an error. If cleanup is set to True, - the session data is removed from the database. + the session will result in an error. **Arguments:** - * **cleanup** (`bool`): - Remove session from MongoDB (implies * terminate) * **terminate** (`bool`): Shut down all pilots associated with the session. * **download** (`bool`): @@ -256,11 +227,7 @@ def close(self, cleanup=False, terminate=True, download=False): self._prof.prof("session_close", uid=self._uid) # set defaults - if cleanup is None: cleanup = True - if terminate is None: terminate = True - - if cleanup: - # cleanup implies terminate + if terminate is None: terminate = True for tmgr_uid, tmgr in self._tmgrs.items(): @@ -276,11 +243,15 @@ def close(self, cleanup=False, terminate=True, download=False): if self._cmgr: self._cmgr.close() - if self._dbs: - self._log.debug("session %s closes db (%s)", self._uid, cleanup) - self._dbs.close(delete=cleanup) + if self._service: + try: + self._log.debug("session %s closes service", self._uid) + self._service.request('client_unregister', + {'sid': self._uid}) + except: + pass - self._log.debug("session %s closed (delete=%s)", self._uid, cleanup) + self._log.debug("session %s closed", self._uid) self._prof.prof("session_stop", uid=self._uid) self._prof.close() @@ -311,12 +282,12 @@ def as_dict(self): ''' object_dict = { - "uid" : self._uid, - "created" : self.created, - "connected" : self.connected, - "closed" : self.closed, - "dburl" : str(self.dburl), - "cfg" : copy.deepcopy(self._cfg) + "uid" : self._uid, + "created" : self.created, + "connected" : self.connected, + "closed" : self.closed, + "service_url": str(self.service_url), + "cfg" : copy.deepcopy(self._cfg) } return object_dict @@ -353,23 +324,8 @@ def path(self): # -------------------------------------------------------------------------- # @property - def dburl(self): - return self._cfg.dburl - - - # -------------------------------------------------------------------------- - # - def get_db(self): - - if self._dbs: return self._dbs.get_db() - else : return None - - - # -------------------------------------------------------------------------- - # - @property - def primary(self): - return self._primary + def service_url(self): + return self._cfg.service_url # -------------------------------------------------------------------------- @@ -383,51 +339,9 @@ def cfg(self): # @property def cmgr(self): - assert(self._primary) return self._cmgr - # -------------------------------------------------------------------------- - # - @property - def created(self): - '''Returns the UTC date and time the session was created. - ''' - if self._dbs: return self._dbs.created - else : return None - - - # -------------------------------------------------------------------------- - # - @property - def connected(self): - ''' - Return time when the session connected to the DB - ''' - - if self._dbs: return self._dbs.connected - else : return None - - - # -------------------------------------------------------------------------- - # - @property - def is_connected(self): - - return self._dbs.is_connected - - - # -------------------------------------------------------------------------- - # - @property - def closed(self): - ''' - Returns the time of closing - ''' - if self._dbs: return self._dbs.closed - else : return None - - # -------------------------------------------------------------------------- # def _get_logger(self, name, level=None): @@ -477,17 +391,17 @@ def inject_metadata(self, metadata): if not isinstance(metadata, dict): raise Exception("Session metadata should be a dict!") - if self._dbs and self._dbs._c: - self._dbs._c.update({'type' : 'session', - "uid" : self.uid}, - {"$push" : {"metadata": metadata}}) + # FIXME MONGODB: to json + # if self._dbs and self._dbs._c: + # self._dbs._c.update({'type' : 'session', + # "uid" : self.uid}, + # {"$push" : {"metadata": metadata}}) # -------------------------------------------------------------------------- # def _register_pmgr(self, pmgr): - self._dbs.insert_pmgr(pmgr.as_dict()) self._pmgrs[pmgr.uid] = pmgr @@ -537,7 +451,6 @@ def get_pilot_managers(self, pmgr_uids=None): # def _register_tmgr(self, tmgr): - self._dbs.insert_tmgr(tmgr.as_dict()) self._tmgrs[tmgr.uid] = tmgr @@ -689,7 +602,7 @@ def get_resource_config(self, resource, schema=None): # def fetch_profiles(self, tgt=None, fetch_client=False): - return rpu.fetch_profiles(self._uid, dburl=self.dburl, tgt=tgt, + return rpu.fetch_profiles(self._uid, service_url=self.service_url, tgt=tgt, session=self) @@ -697,7 +610,7 @@ def fetch_profiles(self, tgt=None, fetch_client=False): # def fetch_logfiles(self, tgt=None, fetch_client=False): - return rpu.fetch_logfiles(self._uid, dburl=self.dburl, tgt=tgt, + return rpu.fetch_logfiles(self._uid, service_url=self.service_url, tgt=tgt, session=self) @@ -705,7 +618,7 @@ def fetch_logfiles(self, tgt=None, fetch_client=False): # def fetch_json(self, tgt=None, fetch_client=False): - return rpu.fetch_json(self._uid, dburl=self.dburl, tgt=tgt, + return rpu.fetch_json(self._uid, service_url=self.service_url, tgt=tgt, session=self) diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index e1b0308a73..a2ab54b72d 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -107,7 +107,7 @@ def _control_cb(self, topic, msg): elif cmd == 'pilot_register': pid = arg['pid'] - self._log.debug('=== register pilot %s', pid) + self._log.debug('register pilot %s', pid) if pid not in self._connected: self._connected.append(pid) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 9fe9cdc92a..c94791fad5 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -520,8 +520,6 @@ def create(cfg, session): # from radical.pilot import constants as rpc comp = { - rpc.WORKER : rpt.Worker, - rpc.UPDATE_WORKER : rpw.Update, rpc.STAGER_WORKER : rpw.Stager, rpc.PMGR_LAUNCHING_COMPONENT : rppm.Launching, diff --git a/src/radical/pilot/worker/__init__.py b/src/radical/pilot/worker/__init__.py index f71925bd9f..7ba678592d 100644 --- a/src/radical/pilot/worker/__init__.py +++ b/src/radical/pilot/worker/__init__.py @@ -2,7 +2,6 @@ __copyright__ = "Copyright 2016, http://radical.rutgers.edu" __license__ = "MIT" -from .update import Update from .stager import Stager diff --git a/src/radical/pilot/worker/update.py b/src/radical/pilot/worker/update.py deleted file mode 100644 index f04eb55ee7..0000000000 --- a/src/radical/pilot/worker/update.py +++ /dev/null @@ -1,252 +0,0 @@ - -__copyright__ = "Copyright 2016, http://radical.rutgers.edu" -__license__ = "MIT" - - -import sys -import time -import pymongo - -import radical.utils as ru - -from .. import utils as rpu -from .. import constants as rpc - -from ..db import DBSession - - - -def out(msg): - sys.stdout.write('%s\n' % msg) - sys.stdout.flush() - - -# ------------------------------------------------------------------------------ -# -DEFAULT_BULK_COLLECTION_TIME = 1.0 # seconds -DEFAULT_BULK_COLLECTION_SIZE = 100 # seconds - - -# ------------------------------------------------------------------------------ -# -class Update(rpu.Worker): - ''' - An UpdateWorker pushes Task and Pilot state updates to mongodb. Its instances - compete for update requests on the update_queue. Those requests will be - triplets of collection name, query dict, and update dict. Update requests - will be collected into bulks over some time (BULK_COLLECTION_TIME) and - number (BULK_COLLECTION_SIZE) to reduce number of roundtrips. - ''' - - # -------------------------------------------------------------------------- - # - def __init__(self, cfg, session): - - rpu.Worker.__init__(self, cfg, session) - - - # -------------------------------------------------------------------------- - # - def initialize(self): - - self._sid = self._cfg['sid'] - self._dburl = self._cfg['dburl'] - - # get db handle from a connected, non-primary session - self._dbs = DBSession(self._sid, self._dburl, {}, self._log, connect=True) - self._coll = self._dbs._c - self._bulk = self._coll.initialize_ordered_bulk_op() - self._last = time.time() # time of last bulk push - self._uids = list() # list of collected uids - self._lock = ru.Lock() # protect _bulk - - self._bulk_time = self._cfg.bulk_time - self._bulk_size = self._cfg.bulk_size - - self.register_subscriber(rpc.STATE_PUBSUB, self._state_cb) - self.register_timed_cb(self._idle_cb, timer=self._bulk_time) - - - # -------------------------------------------------------------------------- - # - @classmethod - def create(cls, cfg, session): - - return cls(cfg, session) - - - # -------------------------------------------------------------------------- - # - def _timed_bulk_execute(self, flush=False): - - # is there anything to execute? - if not self._uids: - return True - - now = time.time() - age = now - self._last - - # only push if flush is forced, or when collection time or size - # have been exceeded - if not flush \ - and age < self._bulk_time \ - and len(self._uids) < self._bulk_size: - return False - - try: - self._bulk.execute() - - except pymongo.errors.OperationFailure as e: - self._log.exception('bulk exec error: %s' % e.details) - raise - - except Exception as e: - self._log.exception('mongodb error: %s', e) - raise - - self._prof.prof('update_pushed', msg='bulk size: %d' % len(self._uids)) - - # for entry in self._uids: - # - # uid = entry[0] - # state = entry[2] - # - # if state: - # self._prof.prof('update_pushed', uid=uid, msg=state) - # else: - # self._prof.prof('update_pushed', uid=uid) - - # empty bulk, refresh state - self._last = now - self._bulk = self._coll.initialize_ordered_bulk_op() - self._uids = list() - - return True - - - # -------------------------------------------------------------------------- - # - def _idle_cb(self): - - with self._lock: - self._timed_bulk_execute() - - return True - - - # -------------------------------------------------------------------------- - # - def _state_cb(self, topic, msg): - ''' - - # FIXME: this documentation is not final, nor does it reflect reality! - - 'msg' is expected to be of the form ['cmd', 'thing'], where 'thing' is - an entity to update in the DB, and 'cmd' specifies the mode of update. - - 'things' are expected to be dicts with a 'type' and 'uid' field. If - either one does not exist, an exception is raised. - - Supported types are: - - - task - - pilot - - supported 'cmds': - - - delete : delete can be delayed until bulk is collected/flushed - - update : update can be delayed until bulk is collected/flushed - - state : update can be delayed until bulk is collected/flushed - only state and state history are updated - - delete_flush: delete is sent immediately (possibly in a bulk) - - update_flush: update is sent immediately (possibly in a bulk) - - state_flush : update is sent immediately (possibly in a bulk) - only state and state history are updated - - flush : flush pending bulk - - The 'thing' can contains '$set' and '$push' fields, which will then be - used as given. For all other fields, we use the following convention: - - - scalar values: use '$set' - - dict values: use '$set' - - list values: use '$push' - - That implies that all potential 'list' types should be defined in the - initial 'thing' insert as such, as (potentially empty) lists. - - For 'cmd' in ['state', 'state_flush'], only the 'uid' and 'state' fields - of the given 'thing' are used, all other fields are ignored. If 'state' - does not exist, an exception is raised. - ''' - - try: - cmd = msg['cmd'] - things = msg['arg'] - - # cmds = ['delete', 'update', 'state', - # 'delete_flush', 'update_flush', 'state_flush', 'flush'] - if cmd not in ['update', 'insert']: - return True - - if cmd == 'insert': - self._dbs.insert_units(ru.as_list(things)) - return True - - - # FIXME: we don't have any error recovery -- any failure to update - # state in the DB will thus result in an exception here and tear - # down the module. - for thing in ru.as_list(things): - - # got a new request. Add to bulk (create as needed), - # and push bulk if time is up. - uid = thing['uid'] - ttype = thing['type'] - state = thing['state'] - - if 'clone' in uid: - # we don't push clone states to DB - return True - - # self._prof.prof('update_request', msg=state, uid=uid) - - if not state: - # nothing to push - return True - - # create an update document - update_dict = dict() - update_dict['$set'] = dict() - update_dict['$push'] = dict() - - for key,val in thing.items(): - # never set _id, states (to avoid index clash, doubled ops) - if key not in ['_id', 'states', 'cmds']: - update_dict['$set'][key] = val - - # we set state, put (more importantly) we push the state onto - # the 'states' list, so that we can later get state progression - # in sync with the state model, even if they have been pushed - # here out-of-order - update_dict['$push']['states'] = state - - with self._lock: - - # push the update request onto the bulk - self._uids.append([uid, ttype, state]) - self._bulk.find ({'uid' : uid, - 'type': ttype}) \ - .update(update_dict) - - with self._lock: - # attempt a timed update - self._timed_bulk_execute() - - return True - - except: - return False - - -# ------------------------------------------------------------------------------ - diff --git a/tests/integration_tests/test_agent_bridge.py b/tests/integration_tests/test_agent_bridge.py index e099d1e0b0..7dd5f76e84 100755 --- a/tests/integration_tests/test_agent_bridge.py +++ b/tests/integration_tests/test_agent_bridge.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import sys import time import radical.utils as ru @@ -7,9 +8,14 @@ # ------------------------------------------------------------------------------ # -def test_agent_bridge(sid='foo'): +def test_agent_bridge(url=None): - bridge = ru.zmq.Client('server.0000') + if url: + bridge = ru.zmq.Client(url=url) + else: + bridge = ru.zmq.Client(server='server.0000') + + sid = 'foo' try: print(bridge.request('client_register', {'sid': sid})) @@ -36,7 +42,11 @@ def test_agent_bridge(sid='foo'): # if __name__ == '__main__': - test_agent_bridge() + url = None + if len(sys.argv) > 1: + url = sys.argv[1] + + test_agent_bridge(url) # ------------------------------------------------------------------------------ From 2e3f84e0edf9e520694a5abe7b1d44d551de1b1d Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 22 Mar 2021 20:50:45 +0100 Subject: [PATCH 015/171] snapshot --- TODO | 7 + bin/radical-pilot-agent-statepush | 84 --- ...gent-bridge => radical-pilot-proxy-server} | 110 ++-- examples/01_task_details.py | 9 +- src/radical/pilot/agent/agent_0.py | 496 +++++------------- src/radical/pilot/agent/bootstrap_0.sh | 11 - .../pilot/agent/staging_output/default.py | 1 + .../pilot/configs/session_default.json | 1 - src/radical/pilot/constants.py | 4 + src/radical/pilot/pilot.py | 4 +- src/radical/pilot/pilot_manager.py | 56 +- src/radical/pilot/pmgr/launching/default.py | 23 +- src/radical/pilot/session.py | 203 +++++-- src/radical/pilot/task_manager.py | 166 +----- src/radical/pilot/tmgr/scheduler/base.py | 8 +- .../pilot/tmgr/staging_input/default.py | 31 +- .../pilot/tmgr/staging_output/default.py | 4 +- src/radical/pilot/utils/component.py | 63 ++- src/radical/pilot/utils/prof_utils.py | 11 +- src/radical/pilot/utils/session.py | 49 +- src/radical/pilot/worker/stager.py | 1 - 21 files changed, 465 insertions(+), 877 deletions(-) delete mode 100755 bin/radical-pilot-agent-statepush rename bin/{radical-pilot-agent-bridge => radical-pilot-proxy-server} (73%) diff --git a/TODO b/TODO index 92466075b3..f861504423 100644 --- a/TODO +++ b/TODO @@ -252,6 +252,13 @@ term iv - Exception in SA startup causes hang (agent_0 does not die) +others +------ - `PRTE` switches in scheduler should become `partition` switches, where the partitions are defined by the RM + - stager and other RPC like workers should get a proper async RPC channel + (req/res). That should be built upon `ru.zmq.Service`. The client side + should wrap the request into a proper async Future. + + diff --git a/bin/radical-pilot-agent-statepush b/bin/radical-pilot-agent-statepush deleted file mode 100755 index 90976cab2c..0000000000 --- a/bin/radical-pilot-agent-statepush +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python - -''' -This utility will push a pilot state update to MongoDB. Its purpose is to delay -that state update until after all profiles and logfiles have been closed and -packaged, so that the client side can rely on the state update to signal that. -A state update in the agent process itself wold necessarily happen before -control is returned to the bootstrapper, and thus before the bootstrapper has -a chance to pack up the profiles, which results in a race with the client to -pull those packages. - -Note that this tool assumes a specific session straucture in the DB, and thus -needs to be kept in sync with the respective RP code. -''' - -import os -import sys -import time - -import radical.utils as ru - - -# ------------------------------------------------------------------------------ -# -if __name__ == '__main__': - - sid = None - pid = None - state = None - cfg = None - dburl = None - - if len(sys.argv) == 3: - json = sys.argv[1] - state = sys.argv[2] - - cfg = ru.read_json(json) - sid = cfg['sid'] - pid = cfg['pid'] - dburl = cfg['dburl'] - hostport = os.environ.get('RADICAL_PILOT_DB_HOSTPORT') - - elif len(sys.argv) == 4: - sid = sys.argv[1] - pid = sys.argv[2] - state = sys.argv[3] - dburl = os.environ['RADICAL_PILOT_DBURL'] - hostport = os.environ.get('RADICAL_PILOT_DB_HOSTPORT') - - else: - raise ValueError('need sid, pid and state arguments %s' % sys.argv[1:]) - - - print('dburl : %s' % dburl) - print('tunnel : %s' % hostport) - - if hostport: - dburl = ru.Url(dburl) - dburl.host, dburl.port = hostport.split(':') - print('dburl[t]: %s' % dburl) - - print('session : %s' % sid) - print('pilot : %s' % pid) - print('state : %s' % pid) - - # mpongodb_connect wants a string at the moment - mongo, db, _, _, _ = ru.mongodb_connect(str(dburl)) - - if not mongo or not db: - raise RuntimeError('Could not connect to database at %s' % dburl) - - coll = db[sid] - ret = coll.update({'type' : 'pilot', - 'uid' : pid}, - {'$push': {'states' : state}, - '$set' : {'state' : state, - 'finished': time.time()} - }) - - print('update : %s' % ret) - - -# ------------------------------------------------------------------------------ - diff --git a/bin/radical-pilot-agent-bridge b/bin/radical-pilot-proxy-server similarity index 73% rename from bin/radical-pilot-agent-bridge rename to bin/radical-pilot-proxy-server index 955b1befe7..f84c535b0e 100755 --- a/bin/radical-pilot-agent-bridge +++ b/bin/radical-pilot-proxy-server @@ -25,14 +25,12 @@ _HIGH_WATER_MARK = 0 # number of messages to buffer before dropping # agent connections, identified by a common session ID. A client connection # will trigger the creation of the following communication channels: # -# - control_pubsub_bridge +# - proxy_control_pubsub_bridge # links client and agent control pubsubs (includes heartbeat) -# - state_pubsub_bridge +# - proxy_state_pubsub_bridge # forwards task state updates from agents to client -# - agent_input_queue -# forwards tasks from the client to the agents -# - agent_output_queue -# forwards tasks from the agents to the client +# - proxy_task_queue +# forwards tasks from the client to the agents and vice versa # # # The protocol on the `bridge_request` channel is as follows: @@ -45,10 +43,9 @@ _HIGH_WATER_MARK = 0 # number of messages to buffer before dropping # 'arg': 'sid': # # reply: -# 'res': {'control_pubsub' : {'sub': , 'pub': }, -# 'state_pubsub' : {'sub': , 'pub': }, -# 'agent_input_queue' : {'put': , 'get': }, -# 'agent_output_queue': {'put': , 'get': }} +# 'res': {'proxy_control_pubsub': {'sub': , 'pub': }, +# 'proxy_state_pubsub' : {'sub': , 'pub': }, +# 'proxy_task_queue' : {'put': , 'get': }} # # notes: # - the request will fail if the session ID is known from another @@ -60,8 +57,8 @@ _HIGH_WATER_MARK = 0 # number of messages to buffer before dropping # - see semantics of the 'client_unregister' request for details. # - the same termination semantics holds for the 'client_unregister' # request. -# - any agent queues which exist for that session at the time of -# termination will also be closed, disregarding ay data hekd in those +# - any task queues which exist for that session at the time of +# termination will also be closed, disregarding any data held in those # queues. # # @@ -73,10 +70,9 @@ _HIGH_WATER_MARK = 0 # number of messages to buffer before dropping # 'arg': 'sid': # # reply: -# 'res': {'control_pubsub' : {'sub': , 'pub': }, -# 'state_pubsub' : {'sub': , 'pub': }, -# 'agent_input_queue' : {'put': , 'get': }, -# 'agent_output_queue': {'put': , 'get': }} +# 'res': {'proxy_control_pubsub': {'sub': , 'pub': }, +# 'proxy_state_pubsub' : {'sub': , 'pub': }, +# 'proxy_task_queue' : {'put': , 'get': }} # # notes: # - the request will fail if the session ID is not registered (anymore) @@ -140,7 +136,7 @@ class ZMQBridge(ru.zmq.Server): self._lock = mt.Lock() self._clients = dict() - ru.zmq.Server.__init__(self) + ru.zmq.Server.__init__(self, url='tcp://*:10000+') self._monitor = mt.Thread(target=self._monitor) self._monitor.daemon = True @@ -209,7 +205,7 @@ class ZMQBridge(ru.zmq.Server): sid = arg['sid'] if sid in self._clients: - return {'err': 'client already registered'} + raise RuntimeError('client already registered') q = mp.Queue() term = mp.Event() @@ -220,14 +216,14 @@ class ZMQBridge(ru.zmq.Server): data = q.get(timeout=10) except queue.Empty: proc.terminate() - return {'err': 'worker startup failed'} + raise RuntimeError('worker startup failed') self._clients[sid] = {'proc': proc, 'term': term, 'data': data, 'hb' : time.time()} - return {'res': self._clients[sid]['data']} + return self._clients[sid]['data'] # -------------------------------------------------------------------------- @@ -236,36 +232,36 @@ class ZMQBridge(ru.zmq.Server): log = ru.Logger('radical.pilot.bridge', level='debug', path=sid) - control_pubsub = None - state_pubsub = None - agent_queue = None + proxy_cp = None + proxy_sp = None + proxy_aq = None try: - control_pubsub = ru.zmq.PubSub(cfg={'channel': 'control_pubsub', - 'uid' : 'control_pubsub', - 'type' : 'pubsub', - 'path' : sid}) - - state_pubsub = ru.zmq.PubSub(cfg={'channel': 'state_pubsub', - 'uid' : 'state_pubsub', - 'type' : 'pubsub', - 'path' : sid}) - - agent_queue = ru.zmq.Queue (cfg={'channel': 'agent_queue', - 'uid' : 'agent_queue', - 'type' : 'queue', - 'path' : sid}) - - control_pubsub.start() - state_pubsub.start() - agent_queue.start() - - data = {'control_pubsub' : {'pub': str(control_pubsub.addr_pub), - 'sub': str(control_pubsub.addr_sub)}, - 'state_pubsub' : {'pub': str(state_pubsub.addr_pub), - 'sub': str(state_pubsub.addr_sub)}, - 'agent_queue' : {'put': str(agent_queue._addr_put), - 'get': str(agent_queue._addr_get)}} + proxy_cp = ru.zmq.PubSub(cfg={'channel': 'proxy_control_pubsub', + 'uid' : 'proxy_control_pubsub', + 'type' : 'pubsub', + 'path' : sid}) + + proxy_sp = ru.zmq.PubSub(cfg={'channel': 'proxy_state_pubsub', + 'uid' : 'proxy_state_pubsub', + 'type' : 'pubsub', + 'path' : sid}) + + proxy_aq = ru.zmq.Queue (cfg={'channel': 'proxy_task_queue', + 'uid' : 'proxy_task_queue', + 'type' : 'queue', + 'path' : sid}) + + proxy_cp.start() + proxy_sp.start() + proxy_aq.start() + + data = {'proxy_control_pubsub': {'pub': str(proxy_cp.addr_pub), + 'sub': str(proxy_cp.addr_sub)}, + 'proxy_state_pubsub' : {'pub': str(proxy_sp.addr_pub), + 'sub': str(proxy_sp.addr_sub)}, + 'proxy_task_queue' : {'put': str(proxy_aq.addr_put), + 'get': str(proxy_aq.addr_get)}} # inform service about endpoint details q.put(data) @@ -280,9 +276,9 @@ class ZMQBridge(ru.zmq.Server): finally: - if control_pubsub: control_pubsub.stop() - if state_pubsub : state_pubsub.stop() - if agent_queue : agent_queue.stop() + if proxy_cp: proxy_cp.stop() + if proxy_sp: proxy_sp.stop() + if proxy_aq: proxy_aq.stop() log.info('terminated') @@ -295,9 +291,9 @@ class ZMQBridge(ru.zmq.Server): with self._lock: if sid not in self._clients: - return {'err': 'client not registered'} + raise RuntimeError('client %s not registered' % sid) - return {'res': self._clients[sid]['data']} + return self._clients[sid]['data'] # -------------------------------------------------------------------------- @@ -309,15 +305,13 @@ class ZMQBridge(ru.zmq.Server): with self._lock: if sid not in self._clients: - return {'err': 'client not registered'} + raise RuntimeError('client %s not registered' % sid) self._clients[sid]['term'].set() self._clients[sid]['proc'].join() del(self._clients[sid]) - return {'res': 'ok'} - # -------------------------------------------------------------------------- # @@ -329,12 +323,10 @@ class ZMQBridge(ru.zmq.Server): with self._lock: if sid not in self._clients: - return {'err': 'client not registered'} + raise RuntimeError('client %s not registered' % sid) self._clients[sid]['hb'] = now - return {'res': 'ok'} - # ------------------------------------------------------------------------------ # diff --git a/examples/01_task_details.py b/examples/01_task_details.py index c57a02eff4..804808a08a 100755 --- a/examples/01_task_details.py +++ b/examples/01_task_details.py @@ -58,12 +58,12 @@ # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object pd_init = {'resource' : resource, - 'runtime' : 15, # pilot runtime (min) + 'runtime' : 300, 'exit_on_error' : True, 'project' : config[resource].get('project', None), 'queue' : config[resource].get('queue', None), 'access_schema' : config[resource].get('schema', None), - 'cores' : 1024 * 2, + 'cores' : 1024 * 16, 'gpus' : config[resource].get('gpus', 0), } pdesc = rp.PilotDescription(pd_init) @@ -78,7 +78,7 @@ # Create a workload of tasks. # Each task runs '/bin/date'. - n = 64 * 1024 # number of tasks to run + n = 1024 * 1024 # number of tasks to run report.info('create %d task description(s)\n' % n) tds = list() @@ -89,6 +89,7 @@ # Here we don't use dict initialization. td = rp.TaskDescription() td.executable = '/bin/date' + td.sandbox = 'task_sandbox' tds.append(td) report.progress() @@ -104,7 +105,7 @@ tmgr.wait_tasks() report.info('\n') - for task in tasks: + for task in tasks[:10]: report.plain(' * %s: %s, exit: %3s, out: %s' % (task.uid, task.state[:4], task.exit_code, task.stdout)) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 89268fc749..136e15ca9b 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -18,7 +18,6 @@ from .. import utils as rpu from .. import states as rps from .. import constants as rpc -from ..db import DBSession from .resource_manager import ResourceManager from .launch_method import LaunchMethod @@ -33,12 +32,11 @@ class Agent_0(rpu.Worker): the sub-agents die, it will shut down the other sub-agents and itself. This class inherits the rpu.Worker, so that it can use its communication - bridges and callback mechanisms. Specifically, it will pull the DB for - new tasks to be exexuted and forwards them to the agent's component - network (see `work()`). It will also watch the DB for any commands to be - forwarded (pilot termination, task cancelation, etc), and will take care - of heartbeat messages to be sent to the client module. To do all this, it - initializes a DB connection in `initialize()`. + bridges and callback mechanisms. Specifically, it will pull the tasks from + the proxy comm channels and forwards them to the agent's component network + (see `work()`). It will also watch the proxy pubsub for any commands to be + enacted or forwarded (pilot termination, task cancelation, etc), and will + take care of heartbeat messages to be sent to the client. ''' # -------------------------------------------------------------------------- @@ -50,11 +48,15 @@ def __init__(self, cfg, session): self._pmgr = cfg.pmgr self._pwd = cfg.pilot_sandbox self._session = session + self._sid = self._session.uid self._log = session._log self._starttime = time.time() self._final_cause = None + # pick up proxy config from session + self._cfg.proxy = self._session._cfg.proxy + rpu.Worker.__init__(self, self._cfg, session) # this is the earliest point to sync bootstrap and agent profiles @@ -76,19 +78,14 @@ def __init__(self, cfg, session): self._cmgr.start_bridges() self._cmgr.start_components() - # connect to client communication channels, maybe - self._connect_client() - - # connect to MongoDB for state push/pull - self._connect_db() + # connect to proxy communication channels, maybe + self._connect_proxy() # create the sub-agent configs and start the sub agents self._write_sa_configs() self._start_sub_agents() # TODO: move to cmgr? - # at this point the session is up and connected, and it should have - # brought up all communication bridges and components. We are - # ready to rumble! + # handle control messages self.register_subscriber(rpc.CONTROL_PUBSUB, self._check_control) # run our own slow-paced heartbeat monitor to watch pmgr heartbeats @@ -108,6 +105,21 @@ def __init__(self, cfg, session): self._log.info('hb init for %s', self._pmgr) self._hb.beat(uid=self._pmgr) + # register the control callback + self.register_subscriber(rpc.PROXY_CONTROL_PUBSUB, + self._proxy_control_cb) + + # proxy state updates + self.register_publisher(rpc.PROXY_STATE_PUBSUB) + self.register_subscriber(rpc.STATE_PUBSUB, self._proxy_state_cb) + + # regularly check for lifetime limit + self.register_timed_cb(self._check_lifetime, timer=10) + + # as long as we are alive, we also want to keep the proxy alive + self._session._run_proxy_hb() + + # -------------------------------------------------------------------------- # @@ -128,150 +140,76 @@ def _hb_term_cb(self, msg=None): # -------------------------------------------------------------------------- # - def _connect_db(self): + def _connect_proxy(self): - # Check for the RADICAL_PILOT_DB_HOSTPORT env var, which will hold - # the address of the tunnelized DB endpoint. If it exists, we - # overrule the agent config with it. - hostport = os.environ.get('RADICAL_PILOT_DB_HOSTPORT') - if hostport: - host, port = hostport.split(':', 1) - dburl = ru.Url(self._cfg.dburl) - dburl.host = host - dburl.port = port - self._cfg.dburl = str(dburl) + # write config files for proxy channels + for p in self._cfg.proxy: + ru.write_json('%s.cfg' % p, self._cfg.proxy[p]) - self._dbs = DBSession(sid=self._cfg.sid, dburl=self._cfg.dburl, - cfg=self._cfg, log=self._log) + # listen for new tasks from the client + self.register_input(rps.AGENT_STAGING_INPUT_PENDING, + rpc.PROXY_TASK_QUEUE, + qname=self._pid, + cb=self._proxy_input_cb) + # and forward to agent input staging + self.register_output(rps.AGENT_STAGING_INPUT_PENDING, + rpc.AGENT_STAGING_INPUT_QUEUE) - # -------------------------------------------------------------------------- - # - def _connect_client(self): - - # when running on the same host as the client, we may be able to bypass - # MongoDB and instead connect to the client's ZMQ communication - # channels. - # - cfg = self._cfg.client_comm - - # connect to the client's scheduler pubsub (to get tasks) - self._client_input = ru.zmq.Subscriber( - channel='agent_staging_input_pubsub', - url=cfg.agent_staging_input_pubsub.sub, - topic=self._pid, - cb=self._client_input_cb, - log=self._log, - prof=self._prof) - self._client_input.subscribe(self._pid) - - # completed tasks are fed back to the tmgr staging output queue - self._client_output = ru.zmq.Putter(rpc.TMGR_STAGING_OUTPUT_QUEUE, - url=cfg.tmgr_staging_output_queue.put) - - # and listen for completed tasks to foward to client + # listen for completed tasks to foward to client self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.AGENT_COLLECTING_QUEUE, - self._agent_collect_cb) - - # and control pubsub (to register) - self._client_ctrl_pub = ru.zmq.Publisher(channel=rpc.CONTROL_PUBSUB, - url=cfg.control_pubsub.pub, - log=self._log, - prof=self._prof) - - # and control pubsub (to register) - self._client_ctrl_sub = ru.zmq.Subscriber(channel=rpc.CONTROL_PUBSUB, - url=cfg.control_pubsub.sub, - log=self._log, - prof=self._prof, - topic=rpc.CONTROL_PUBSUB) - - # allow control pubsub to connect - time.sleep(1) - - # channels are set up, register client - self._client_ctrl_pub.put(rpc.CONTROL_PUBSUB, - msg={'cmd': 'pilot_register', - 'arg': {'pid': self._pid}}) - self._log.debug('pilot registered') - - # and wait for at most 30 seconds for acknoweldgement - start = time.time() - self._registered = False - while True: - - tout = 30 - (time.time() - start) - if tout < 0: - break - - topic, msg = self._client_ctrl_sub.get_nowait(timeout=tout) - if not msg: - continue - - cmd = msg['cmd'] - if cmd != 'pilot_register_ok': - continue - - pid = msg['arg']['pid'] - if pid == self._pid: - self._registered = True - break - - self._log.debug('registered: %s', self._registered) + self._proxy_output_cb) + # and register output + self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, + rpc.PROXY_TASK_QUEUE) + # FIXME: register pubsubs # -------------------------------------------------------------------------- # - def _client_input_cb(self, topic, msg): + def _proxy_input_cb(self, msg): + + self._log.debug('=== proxy input cb: %s', len(msg)) - self._log.debug('=== input cb %s: %s', topic, len(msg)) + to_advance = list() for task in msg: # make sure the tasks obtain env settings (if needed) if 'task_environment' in self._cfg: + if not task['description'].get('environment'): task['description']['environment'] = dict() + for k,v in self._cfg['task_environment'].items(): + # FIXME: this might overwrite user specified env task['description']['environment'][k] = v - # we need to make sure to have the correct state: - # task['state'] = rps._task_state_collapse(task['states']) - # self._prof.prof('get', uid=task['uid']) - # FIXME: raise or fail task! if task['state'] != rps.AGENT_STAGING_INPUT_PENDING: self._log.error('invalid state: %s:%s:%s', task['uid'], task['state'], task.get('states')) + continue - task['control'] = 'agent' + to_advance.append(task) - # now we really own the CUs, and can start working on them (ie. push + # now we really own the tasks and can start working on them (ie. push # them into the pipeline). We don't publish nor profile as advance, - # since that happened already on the module side when the state was set. - self.advance(msg, publish=False, push=True) + # since the state transition happened already on the client side when + # the state was set. + self.advance(to_advance, publish=False, push=True) # -------------------------------------------------------------------------- # - def _agent_collect_cb(self, msg): - - self._log.debug('=== collect cb: %s', len(msg)) - - if self._client_output: - self._log.debug('=== to client: %s', len(msg)) - self._client_output.put(msg) - - else: - self._log.debug('=== to MongoDB: %s', len(msg)) - for task in msg: - task['$all'] = True - task['control'] = 'tmgr_pending' - self.advance(msg, publish=True, push=False) + def _proxy_output_cb(self, msg): + # we just forward the tasks to the task proxy queue + self._log.debug('=== proxy output cb: %s', len(msg)) + self.advance(msg, publish=False, push=True, qname=self._sid) # -------------------------------------------------------------------------- @@ -333,40 +271,19 @@ def _configure_app_comm(self): # def initialize(self): - # registers the staging_input_queue as this is what we want to push - # tasks to - self.register_output(rps.AGENT_STAGING_INPUT_PENDING, - rpc.AGENT_STAGING_INPUT_QUEUE) - - # register the command callback which pulls the DB for commands - self.register_timed_cb(self._agent_command_cb, - timer=self._cfg['db_poll_sleeptime']) - - # # start ingest thread to pull in tasks - # self._ingest_thread = mt.Thread(target=self._ingest) - # self._ingest_thread.daemon = True - # self._ingest_thread.start() - # sub-agents are started, components are started, bridges are up: we are - # ready to roll! Send state update via client control pubsub + # ready to roll! Send state update rm_info = self._rm.rm_info n_nodes = len(rm_info['node_list']) - pilot = { - 'type' : 'pilot', - 'uid' : self._pid, - 'state' : rps.PMGR_ACTIVE, - 'resources': {'rm_info': rm_info, - 'cpu' : rm_info['cores_per_node'] * n_nodes, - 'gpu' : rm_info['gpus_per_node'] * n_nodes}} - - self._client_ctrl_pub.put(topic=rpc.CONTROL_PUBSUB, - msg={'cmd': 'pilot_activate', - 'arg': {'pilot': pilot}}) - - # also update via MongoDB as fallback - self.advance(pilot, publish=True, push=False) - self._log.debug('=== sent pilot_activate') + pilot = {'type' : 'pilot', + 'uid' : self._pid, + 'state' : rps.PMGR_ACTIVE, + 'resources': {'rm_info': rm_info, + 'cpu' : rm_info['cores_per_node'] * n_nodes, + 'gpu' : rm_info['gpus_per_node'] * n_nodes}} + + self.advance(pilot, publish=True, push=False) # -------------------------------------------------------------------------- @@ -409,6 +326,17 @@ def finalize(self): if self._rm: self._rm.stop() + self._log.info('rusage: %s', rpu.get_rusage()) + + out, err, log = '', '', '' + + try : out = open('./agent.0.out', 'r').read(1024) + except: pass + try : err = open('./agent.0.err', 'r').read(1024) + except: pass + try : log = open('./agent.0.log', 'r').read(1024) + except: pass + if self._final_cause == 'timeout' : state = rps.DONE elif self._final_cause == 'cancel' : state = rps.CANCELED elif self._final_cause == 'sys.exit' : state = rps.CANCELED @@ -419,29 +347,17 @@ def finalize(self): with open('./killme.signal', 'w') as fout: fout.write('%s\n' % state) - # we don't rely on the existence / viability of the update worker at - # that point. - self._log.debug('update db state: %s: %s', state, self._final_cause) - self._log.info('rusage: %s', rpu.get_rusage()) - - out, err, log = '', '', '' - - try : out = open('./agent.0.out', 'r').read(1024) - except: pass - try : err = open('./agent.0.err', 'r').read(1024) - except: pass - try : log = open('./agent.0.log', 'r').read(1024) - except: pass + pilot = {'type' : 'pilot', + 'uid' : self._pid, + 'stdout' : out, + 'stderr' : err, + 'logfile': log, + 'state' : state} - ret = self._dbs._c.update({'type' : 'pilot', - 'uid' : self._pid}, - {'$set' : {'stdout' : rpu.tail(out), - 'stderr' : rpu.tail(err), - 'logfile': rpu.tail(log), - 'state' : state}, - '$push': {'states' : state} - }) - self._log.debug('update ret: %s', ret) + self._log.debug('=== push final state update') + self._log.debug('update state: %s: %s', state, self._final_cause) + self.publish(rpc.PROXY_STATE_PUBSUB, + topic=rpc.STATE_PUBSUB, msg=[pilot]) # -------------------------------------------------------------------- @@ -622,138 +538,80 @@ def run(self): # -------------------------------------------------------------------------- # - def _agent_command_cb(self): + def _check_lifetime(self): - if not self._check_commands(): return False - if not self._check_rpc (): return False - if not self._check_state (): return False + # Make sure that we haven't exceeded the runtime - otherwise terminate. + if self._cfg.runtime: + + if time.time() >= self._starttime + (int(self._cfg.runtime) * 60): + + self._log.info('runtime limit (%ss).', self._cfg.runtime * 60) + self._final_cause = 'timeout' + self.stop() + return False # we are done return True # -------------------------------------------------------------------------- # - def _check_commands(self): - - # Check if there's a command waiting - # FIXME: this pull should be done by the update worker, and commands - # should then be communicated over the command pubsub - # FIXME: commands go to pmgr, tmgr, session docs - # FIXME: check if pull/wipe are atomic - # FIXME: long runnign commands can time out on hb - retdoc = self._dbs._c.find_and_modify( - query ={'uid' : self._pid}, - fields=['cmds'], # get new commands - update={'$set': {'cmds': list()}}) # wipe old commands - - if not retdoc: - return True - - for spec in retdoc.get('cmds', []): + def _proxy_state_cb(self, topic, msg): - cmd = spec['cmd'] - arg = spec['arg'] + # no need to check - blindly forward all messages to the proxy + self.publish(rpc.PROXY_STATE_PUBSUB, topic=topic, msg=msg) - self._log.debug('pilot command: %s: %s', cmd, arg) - self._prof.prof('cmd', msg="%s : %s" % (cmd, arg), uid=self._pid) - - if cmd == 'heartbeat' and arg['pmgr'] == self._pmgr: - self._hb.beat(uid=self._pmgr) - elif cmd == 'prep_env': - env_spec = arg - - for env_id in env_spec: - # ensure we have a hb period - self._hb.beat(uid=self._pmgr) - self._prepare_env(env_id, env_spec[env_id]) - - elif cmd == 'cancel_pilot': - self._log.info('cancel pilot cmd') - self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'terminate', - 'arg' : None}) - self._final_cause = 'cancel' - self.stop() - - return False # we are done - - elif cmd == 'cancel_tasks': - self._log.info('cancel_tasks cmd') - self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'cancel_tasks', - 'arg' : arg}) - else: - self._log.warn('could not interpret cmd "%s" - ignore', cmd) + # -------------------------------------------------------------------------- + # + def _proxy_control_cb(self, topic, msg): - return True + self._log.debug('=== proxy control: %s', msg) - # -------------------------------------------------------------------------- - # - def _check_rpc(self): - ''' - check if the DB has any RPC request for this pilot. If so, then forward - that request as `rpc_req` command on the CONTROL channel, and listen for - an `rpc_res` command on the same channel, for the same rpc id. Once - that response is received (from whatever component handled that - command), send the response back to the databse for the callee to pick - up. - ''' + cmd = msg['cmd'] + arg = msg['arg'] - # FIXME: implement a timeout, and/or a registry of rpc clients + self._log.debug('pilot command: %s: %s', cmd, arg) + self._prof.prof('cmd', msg="%s : %s" % (cmd, arg), uid=self._pid) - retdoc = self._dbs._c.find_and_modify( - query ={'uid' : self._pid}, - fields=['rpc_req'], - update={'$set': {'rpc_req': None}}) - if not retdoc: - # no rpc request found - return True + if cmd == 'pmgr_heartbeat' and arg['pmgr'] == self._pmgr: - rpc_req = retdoc.get('rpc_req') - if rpc_req is None: - # document has no rpc request + self._hb.beat(uid=self._pmgr) return True - self._log.debug('rpc req: %s', rpc_req) - # RPCs are synchronous right now - we send the RPC on the command - # channel, hope that some component picks it up and replies, and then - # return that reply. The reply is received via a temporary callback - # defined here, which will receive all CONTROL messages until the right - # rpc response comes along. - def rpc_cb(topic, msg): + if cmd == 'prep_env': - rpc_id = rpc_req['uid'] + env_spec = arg + for env_id in env_spec: + # ensure we have a hb period + self._hb.beat(uid=self._pmgr) + self._prepare_env(env_id, env_spec[env_id]) + return True - cmd = msg['cmd'] - rpc_res = msg['arg'] - if cmd != 'rpc_res': - # not an rpc responese - return True + if cmd == 'cancel_pilots': - if rpc_res['uid'] != rpc_id: - # not the right rpc response + if self._pid not in arg.get('uids'): + self._log.debug('=== ignore cancel %s', msg) return True - # send the response to the DB - self._dbs._c.update({'type' : 'pilot', - 'uid' : self._pid}, - {'$set' : {'rpc_res': rpc_res}}) - - # work is done - unregister this temporary cb (rpc_cb) - return False + self._log.info('=== cancel pilot cmd') + self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'terminate', + 'arg' : None}) + self._final_cause = 'cancel' + self.stop() + return False # we are done - self.register_subscriber(rpc.CONTROL_PUBSUB, rpc_cb) - # ready to receive and proxy rpc response -- forward rpc request on - # control channel - self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'rpc_req', - 'arg' : rpc_req}) + # all other messages (such as cancel_tasks) are forwarded to the agent + # control pubsub, to be picked up by the respective target components + self._log.debug('=== fwd control msg %s', msg) + self.publish(rpc.CONTROL_PUBSUB, msg) - return True # keeb cb registered (self._check_rpc) + return True # -------------------------------------------------------------------------- @@ -806,88 +664,6 @@ def _check_control(self, _, msg): return True - # -------------------------------------------------------------------------- - # - def _check_state(self): - - # Make sure that we haven't exceeded the runtime - otherwise terminate. - if self._cfg.runtime: - - if time.time() >= self._starttime + (int(self._cfg.runtime) * 60): - - self._log.info('runtime limit (%ss).', self._cfg.runtime * 60) - self._final_cause = 'timeout' - self.stop() - return False # we are done - - return True - - - # -------------------------------------------------------------------------- - # - def _ingest(self): - - while not self._term.is_set(): - self._check_tasks_cb() - - - # -------------------------------------------------------------------------- - # - def _check_tasks_cb(self): - - # Check for tasks waiting for input staging and log pull. - # - # FIXME: Unfortunately, 'find_and_modify' is not bulkable, so we have - # to use 'find'. To avoid finding the same tasks over and over - # again, we update the 'control' field *before* running the next - # find -- so we do it right here. - # This also blocks us from using multiple ingest threads, or from - # doing late binding by task pull :/ - task_cursor = self._dbs._c.find({'type' : 'task', - 'pilot' : self._pid, - 'control' : 'agent_pending'}) - if not task_cursor.count(): - # self._log.info('tasks pulled: 0') - time.sleep(self._cfg['db_poll_sleeptime']) - return - - # update the tasks to avoid pulling them again next time. - task_list = list(task_cursor) - task_uids = [task['uid'] for task in task_list] - - self._dbs._c.update({'type' : 'task', - 'uid' : {'$in' : task_uids}}, - {'$set' : {'control' : 'agent'}}, - multi=True) - - self._log.info("=== tasks pulled from db: %4d", len(task_list)) - self._prof.prof('get', msg='bulk: %d' % len(task_list), uid=self._pid) - - for task in task_list: - - # make sure the tasks obtain env settings (if needed) - if 'task_environment' in self._cfg: - if not task['description'].get('environment'): - task['description']['environment'] = dict() - for k,v in self._cfg['task_environment'].items(): - task['description']['environment'][k] = v - - # we need to make sure to have the correct state: - task['state'] = rps._task_state_collapse(task['states']) - self._prof.prof('get', uid=task['uid']) - - # FIXME: raise or fail task! - if task['state'] != rps.AGENT_STAGING_INPUT_PENDING: - self._log.error('invalid state: %s', (pprint.pformat(task))) - - task['control'] = 'agent' - - # now we really own the CUs, and can start working on them (ie. push - # them into the pipeline). We don't publish nor profile as advance, - # since that happened already on the module side when the state was set. - self.advance(task_list, publish=False, push=True) - - # -------------------------------------------------------------------------- # def _prepare_env(self, eid, env_spec): diff --git a/src/radical/pilot/agent/bootstrap_0.sh b/src/radical/pilot/agent/bootstrap_0.sh index b7e0238e0b..f4528b5106 100755 --- a/src/radical/pilot/agent/bootstrap_0.sh +++ b/src/radical/pilot/agent/bootstrap_0.sh @@ -1793,10 +1793,6 @@ export RADICAL_VERBOSE=DEBUG export RADICAL_UTIL_VERBOSE=DEBUG export RADICAL_PILOT_VERBOSE=DEBUG -# the agent will *always* use the dburl from the config file, not from the env -# FIXME: can we better define preference in the session ctor? -unset RADICAL_PILOT_DBURL - # avoid ntphost lookups on compute nodes export RADICAL_PILOT_NTPHOST=$RADICAL_PILOT_NTPHOST @@ -2039,13 +2035,6 @@ then final_state='FAILED' fi -echo "# -------------------------------------------------------------------" -echo "# push final pilot state: $SESSION_ID $PILOT_ID $final_state" -sp=$(which radical-pilot-agent-statepush) -test -z "$sp" && echo "statepush not found" -test -z "$sp" || $PYTHON "$sp" agent.0.cfg "$final_state" - -echo echo "# -------------------------------------------------------------------" echo "#" echo "# Done, exiting ($AGENT_EXITCODE)" diff --git a/src/radical/pilot/agent/staging_output/default.py b/src/radical/pilot/agent/staging_output/default.py index 905e58d5cb..5efa954f41 100644 --- a/src/radical/pilot/agent/staging_output/default.py +++ b/src/radical/pilot/agent/staging_output/default.py @@ -107,6 +107,7 @@ def work(self, tasks): no_staging_tasks.append(task) if no_staging_tasks: + self._log.debug('=== push %s tasks', len(no_staging_tasks)) self.advance(no_staging_tasks, publish=True, push=True) for task,actionables in staging_tasks: diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index bb2a4ca985..ab60efdafa 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -3,7 +3,6 @@ # specified. It contains the minimal set of settings required for # a functional rp session, both on the client and on the agent side. { - "default_dburl": "mongodb://rct:rct_test@two.radical-project.org/rct_test", "service_url" : "${RADICAL_PILOT_SERVICE_URL}", "session_base" : "${RADICAL_PILOT_SESSION_BASE:$PWD}", "record" : "${RADICAL_PILOT_SESSION_RECORD}", diff --git a/src/radical/pilot/constants.py b/src/radical/pilot/constants.py index 59aef32bc4..07d49305a1 100644 --- a/src/radical/pilot/constants.py +++ b/src/radical/pilot/constants.py @@ -43,6 +43,10 @@ STATE_PUBSUB = 'state_pubsub' LOG_PUBSUB = 'log_pubsub' +PROXY_CONTROL_PUBSUB = 'proxy_control_pubsub' +PROXY_STATE_PUBSUB = 'proxy_state_pubsub' +PROXY_TASK_QUEUE = 'proxy_task_queue' + # ------------------------------------------------------------------------------ # diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 4166cb4f35..d9b588bd66 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -630,7 +630,9 @@ def rpc(self, rpc, args): This is basically an RPC into the pilot. ''' - reply = self._session._dbs.pilot_rpc(self.uid, rpc, args) + # FIXME: MongoDB + reply = None + # reply = self._session._dbs.pilot_rpc(self.uid, rpc, args) return reply diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 38886e14aa..567187a58b 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -38,7 +38,7 @@ class PilotManager(rpu.Component): **Example**:: - s = rp.Session(database_url=DBURL) + s = rp.Session() pm = rp.PilotManager(session=s) @@ -96,7 +96,6 @@ def __init__(self, session, cfg='default'): self._pcb_lock = ru.RLock('pmgr.pcb_lock') self._terminate = mt.Event() self._closed = False - self._rec_id = 0 # used for session recording self._uid = ru.generate_id('pmgr.%(item_counter)04d', ru.ID_CUSTOM, ns=session.uid) @@ -118,7 +117,6 @@ def __init__(self, session, cfg='default'): cfg.sid = session.uid cfg.base = session.base cfg.path = session.path - cfg.dburl = session.dburl cfg.heartbeat = session.cfg.heartbeat rpu.Component.__init__(self, cfg, session=session) @@ -149,8 +147,6 @@ def __init__(self, session, cfg='default'): # register the state notification pull cb and hb pull cb # FIXME: we may want to have the frequency configurable # FIXME: this should be a tailing cursor in the update worker - self.register_timed_cb(self._state_pull_cb, - timer=self._cfg['db_poll_sleeptime']) self.register_timed_cb(self._pilot_heartbeat_cb, timer=self._cfg['db_poll_sleeptime']) @@ -269,32 +265,6 @@ def _pilot_heartbeat_cb(self): return True - # -------------------------------------------------------------------------- - # - def _state_pull_cb(self): - - if self._terminate.is_set(): - return False - - # pull all pilot states from the DB, and compare to the states we know - # about. If any state changed, update the known pilot instances and - # push an update message to the state pubsub. - # pubsub. - # FIXME: we also pull for dead pilots. That is not efficient... - # FIXME: this needs to be converted into a tailed cursor in the update - # worker - # FIXME: this is a big and frequently invoked lock - pilot_dicts = self._session._dbs.get_pilots(pmgr_uid=self.uid) - - - for pilot_dict in pilot_dicts: - self._log.debug('state pulled: %s: %s', pilot_dict['uid'], - pilot_dict['state']) - self._update_pilot(pilot_dict, publish=True) - - return True - - # -------------------------------------------------------------------------- # def _state_sub_cb(self, topic, msg): @@ -303,7 +273,7 @@ def _state_sub_cb(self, topic, msg): return False - self._log.debug('state event: %s', msg) + # self._log.debug('state event: %s', msg) cmd = msg.get('cmd') arg = msg.get('arg') @@ -425,7 +395,8 @@ def _call_pilot_callbacks(self, pilot): # def _pilot_send_hb(self, pid=None): - self._session._dbs.pilot_command('heartbeat', {'pmgr': self._uid}, pid) + self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'pmgr_heartbeat', + 'arg' : {'pmgr' : self.uid}}) # -------------------------------------------------------------------------- @@ -435,7 +406,8 @@ def _pilot_prepare_env(self, pid, env_spec): if not env_spec: return - self._session._dbs.pilot_command('prep_env', env_spec, [pid]) + # FIXME: MongoDB + # self._session._dbs.pilot_command('prep_env', env_spec, [pid]) # -------------------------------------------------------------------------- @@ -612,10 +584,6 @@ def submit_pilots(self, descriptions): with self._pilots_lock: self._pilots[pilot.uid] = pilot - if self._session._rec: - ru.write_json(pd.as_dict(), "%s/%s.batch.%03d.json" - % (self._session._rec, pilot.uid, self._rec_id)) - self._rep.plain('\n\t%s %-20s %6d cores %6d gpus' % (pilot.uid, pd['resource'], pd.get('cores', 0), pd.get('gpus', 0))) @@ -628,12 +596,6 @@ def submit_pilots(self, descriptions): # only trigger the profile entry for NEW. self.advance(pilot_docs, state=rps.NEW, publish=False, push=False) - if self._session._rec: - self._rec_id += 1 - - # insert pilots into the database, as a bulk. - self._session._dbs.insert_pilots(pilot_docs) - # immediately send first heartbeat and any other commands which are # included in the pilot description for pilot_doc in pilot_docs: @@ -841,11 +803,7 @@ def cancel_pilots(self, uids=None, _timeout=None): self._log.debug('pilot(s).need(s) cancellation %s', uids) # send the cancelation request to the pilots - # FIXME: the cancellation request should not go directly to the DB, but - # through the DB abstraction layer... - self._session._dbs.pilot_command('cancel_pilot', [], uids) - - # inform pmgr.launcher - it will force-kill the pilot after some delay + # the pmgr.launcher will also force-kill the pilot after some delay self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'cancel_pilots', 'arg' : {'pmgr' : self.uid, 'uids' : uids}}) diff --git a/src/radical/pilot/pmgr/launching/default.py b/src/radical/pilot/pmgr/launching/default.py index ee10c5a42a..b5410decac 100644 --- a/src/radical/pilot/pmgr/launching/default.py +++ b/src/radical/pilot/pmgr/launching/default.py @@ -565,6 +565,7 @@ def _start_pilot_bulk(self, resource, schema, pilots): out, err, ret = ru.sh_callout(cmd, shell=True) if ret: + self._log.debug('cmd: %s', cmd) self._log.debug('out: %s', out) self._log.debug('err: %s', err) raise RuntimeError('callout failed: %s' % cmd) @@ -654,8 +655,8 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # ---------------------------------------------------------------------- # Database connection parameters - sid = self._session.uid - database_url = self._session.cfg.dburl + sid = self._session.uid + service_url = self._session.cfg.service_url # some default values are determined at runtime default_virtenv = '%%(resource_sandbox)s/ve.%s.%s' % \ @@ -678,9 +679,9 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # ---------------------------------------------------------------------- # get parameters from resource cfg, set defaults where needed agent_launch_method = rcfg.get('agent_launch_method') - agent_dburl = rcfg.get('agent_mongodb_endpoint', database_url) - agent_spawner = rcfg.get('agent_spawner', DEFAULT_AGENT_SPAWNER) - agent_config = rcfg.get('agent_config', DEFAULT_AGENT_CONFIG) + agent_service_url = rcfg.get('agent_service_url', service_url) + agent_spawner = rcfg.get('agent_spawner', DEFAULT_AGENT_SPAWNER) + agent_config = rcfg.get('agent_config', DEFAULT_AGENT_CONFIG) agent_scheduler = rcfg.get('agent_scheduler') tunnel_bind_device = rcfg.get('tunnel_bind_device') default_queue = rcfg.get('default_queue') @@ -766,11 +767,11 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): raise RuntimeError("'global_virtenv' is deprecated (%s)" % resource) # Create a host:port string for use by the bootstrap_0. - db_url = rs.Url(agent_dburl) - if db_url.port: - db_hostport = "%s:%d" % (db_url.host, db_url.port) + tmp = rs.Url(agent_service_url) + if tmp.port: + hostport = "%s:%d" % (tmp.host, tmp.port) else: - db_hostport = "%s:%d" % (db_url.host, 27017) # mongodb default + hostport = "%s:%d" % (tmp.host, 27017) # mongodb default # ---------------------------------------------------------------------- # the version of the agent is derived from @@ -940,7 +941,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # set optional args if resource_manager == "CCM": bootstrap_args += " -c" if forward_tunnel_endpoint: bootstrap_args += " -f '%s'" % forward_tunnel_endpoint - if forward_tunnel_endpoint: bootstrap_args += " -h '%s'" % db_hostport + if forward_tunnel_endpoint: bootstrap_args += " -h '%s'" % hostport if python_interpreter: bootstrap_args += " -i '%s'" % python_interpreter if tunnel_bind_device: bootstrap_args += " -t '%s'" % tunnel_bind_device if cleanup: bootstrap_args += " -x '%s'" % cleanup @@ -958,7 +959,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): agent_cfg['scheduler'] = agent_scheduler agent_cfg['runtime'] = runtime agent_cfg['app_comm'] = app_comm - agent_cfg['dburl'] = str(database_url) + agent_cfg['service_url'] = service_url agent_cfg['sid'] = sid agent_cfg['pid'] = pid agent_cfg['pmgr'] = self._pmgr diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 9f951f6a41..eeec601bc1 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -5,14 +5,17 @@ import os import copy +import time + +import threading as mt import radical.utils as ru import radical.saga as rs import radical.saga.filesystem as rsfs import radical.saga.utils.pty_shell as rsup -from .constants import RESOURCE_CONFIG_LABEL_DEFAULT -from . import utils as rpu +from . import constants as rpc +from . import utils as rpu # ------------------------------------------------------------------------------ @@ -79,6 +82,7 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True): self._service = None self._closed = False self._primary = _primary + self._t_start = time.time() self._pmgrs = dict() # map IDs to pmgr instances self._tmgrs = dict() # map IDs to tmgr instances @@ -115,7 +119,7 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True): self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid, - level=self._cfg.get('debug')) + level=self._cfg.get('debug')) from . import version_detail as rp_version_detail self._log.info('radical.pilot version: %s' % rp_version_detail) @@ -136,59 +140,53 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True): 'js_shells' : dict(), 'fs_dirs' : dict()} - self._rep.info ('<>ok\n') + + if self._primary: + self._rep.ok('>>ok\n') # -------------------------------------------------------------------------- @@ -222,7 +220,9 @@ def close(self, terminate=True, download=False): if self._closed: return - self._rep.info('closing session %s' % self._uid) + if self._primary: + self._rep.info('closing session %s' % self._uid) + self._log.debug("session %s closing", self._uid) self._prof.prof("session_close", uid=self._uid) @@ -257,21 +257,114 @@ def close(self, terminate=True, download=False): self._closed = True + # after all is said and done, we attempt to download the pilot log- and # profiles, if so wanted - if download: + if self._primary and download: self._prof.prof("session_fetch_start", uid=self._uid) self._log.debug('start download') tgt = os.getcwd() - self.fetch_json (tgt='%s/%s' % (tgt, self.uid)) + # FIXME: MongoDB + # self.fetch_json (tgt='%s/%s' % (tgt, self.uid)) self.fetch_profiles(tgt=tgt) self.fetch_logfiles(tgt=tgt) self._prof.prof("session_fetch_stop", uid=self._uid) - self._rep.info('<>ok\n') + if self._primary: + self._t_stop = time.time() + self._rep.info('<>ok\n') + + + # -------------------------------------------------------------------------- + # + def _connect_proxy(self): + + assert(self._primary) + + # a primary session will create proxy comm channels + self._rep.info ('<= 1024: + # submit this bulk + task_docs = [u.as_dict() for u in tasks] + self.advance(task_docs, rps.TMGR_SCHEDULING_PENDING, + publish=True, push=True) + ret += tasks + tasks = list() - # insert tasks into the database, as a bulk. - task_docs = [u.as_dict() for u in tasks] - # self._session._dbs.insert_tasks(task_docs) + self._rep.progress_done() - # Only after the insert can we hand the tasks over to the next - # components (ie. advance state). - self.advance(task_docs, rps.TMGR_SCHEDULING_PENDING, - publish=True, push=True) + # submit remaining bulk (if any) + if tasks: + task_docs = [u.as_dict() for u in tasks] + self.advance(task_docs, rps.TMGR_SCHEDULING_PENDING, + publish=True, push=True) + ret += tasks - if ret_list: return tasks - else : return tasks[0] + if ret_list: return ret + else : return ret[0] # -------------------------------------------------------------------------- @@ -1136,7 +1013,8 @@ def cancel_tasks(self, uids=None): 'tmgr' : self.uid}}) # we also inform all pilots about the cancelation request - self._session._dbs.pilot_command(cmd='cancel_tasks', arg={'uids':uids}) + # FIXME: MongoDB + # self._session._dbs.pilot_command(cmd='cancel_tasks', arg={'uids':uids}) # In the default case of calling 'advance' above, we just set the state, # so we *know* tasks are canceled. But we nevertheless wait until that diff --git a/src/radical/pilot/tmgr/scheduler/base.py b/src/radical/pilot/tmgr/scheduler/base.py index 0386bcab23..3988397c8b 100644 --- a/src/radical/pilot/tmgr/scheduler/base.py +++ b/src/radical/pilot/tmgr/scheduler/base.py @@ -309,9 +309,11 @@ def _base_command_cb(self, topic, msg): to_cancel[pid].append(uid) for pid in to_cancel: - self._session._dbs.pilot_command(cmd='cancel_tasks', - arg={'uids' : to_cancel[pid]}, - pids=pid) + # FIXME: MongoDB + pass + # self._session._dbs.pilot_command(cmd='cancel_tasks', + # arg={'uids' : to_cancel[pid]}, + # pids=pid) return True diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index a2ab54b72d..30350fbf82 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -34,13 +34,13 @@ # ------------------------------------------------------------------------------ # class Default(TMGRStagingInputComponent): - """ + ''' This component performs all tmgr side input staging directives for compute tasks. It gets tasks from the tmgr_staging_input_queue, in TMGR_STAGING_INPUT_PENDING state, will advance them to TMGR_STAGING_INPUT state while performing the staging, and then moves then to the AGENT_SCHEDULING_PENDING state, passing control to the agent. - """ + ''' # -------------------------------------------------------------------------- # @@ -63,10 +63,9 @@ def initialize(self): self.register_input(rps.TMGR_STAGING_INPUT_PENDING, rpc.TMGR_STAGING_INPUT_QUEUE, self.work) - self.register_publisher(rpc.AGENT_STAGING_INPUT_PUBSUB) - # this queue is inaccessible, needs routing via mongodb - self.register_output(rps.AGENT_STAGING_INPUT_PENDING, None) + self.register_output(rps.AGENT_STAGING_INPUT_PENDING, + rpc.PROXY_TASK_QUEUE) # we subscribe to the command channel to learn about pilots being added # to this task manager. @@ -126,26 +125,10 @@ def _advance_tasks(self, tasks, pid, state=None): if not state: state = rps.AGENT_STAGING_INPUT_PENDING - if pid not in self._connected: - - self._log.debug('=== send to mdb: %d', len(tasks)) - - for task in tasks: - # pass control via MongoDB - task['$all'] = True - task['control'] = 'agent_pending' - - # insert tasks into the database, as a bulk. - self._session._dbs.insert_tasks(tasks) - - # perform and publish state update - self.advance(tasks, state, publish=True, push=False) - - # publish to the agent_staging_input_pubsub + # push to the proxy queue self._log.debug('=== send to pq: %d', len(tasks)) - self.publish(rpc.AGENT_STAGING_INPUT_PUBSUB, msg=tasks, - topic=pid) + self.advance(tasks, state, publish=True, push=True, qname=pid) # -------------------------------------------------------------------------- @@ -223,7 +206,7 @@ def work(self, tasks): task_sboxes = sboxes[pid] - if len(task_sboxes) >= TASK_BULK_MKDIR_THRESHOLD: + if False or len(task_sboxes) >= TASK_BULK_MKDIR_THRESHOLD: session_sbox = self._session._get_session_sandbox(pilot) diff --git a/src/radical/pilot/tmgr/staging_output/default.py b/src/radical/pilot/tmgr/staging_output/default.py index c66df5ee64..c3c3d68938 100644 --- a/src/radical/pilot/tmgr/staging_output/default.py +++ b/src/radical/pilot/tmgr/staging_output/default.py @@ -40,7 +40,9 @@ def initialize(self): self._cache = dict() self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, - rpc.TMGR_STAGING_OUTPUT_QUEUE, self.work) + rpc.PROXY_TASK_QUEUE, + qname=self._session.uid, + cb=self.work) # we don't need an output queue -- tasks will be final diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index c94791fad5..c8229be2fc 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -676,14 +676,15 @@ def stop(self, timeout=None): # noqa # -------------------------------------------------------------------------- # - def register_input(self, states, input, worker=None): + def register_input(self, states, queue, cb=None, qname=None): ''' Using this method, the component can be connected to a queue on which things are received to be worked upon. The given set of states (which can be a single state or a list of states) will trigger an assert check upon thing arrival. - This method will further associate a thing state with a specific worker. + This method will further associate a thing state with a specific worker + callback `cb`. Upon thing arrival, the thing state will be used to lookup the respective worker, and the thing will be handed over. Workers should call self.advance(thing), in order to push the thing toward the next @@ -699,13 +700,17 @@ def register_input(self, states, input, worker=None): if not states: states = [None] # worker handles stateless entities - name = '%s.%s.%s' % (self.uid, worker.__name__, + if cb: cbname = cb.__name__ + else : cbname = 'none' + + name = '%s.%s.%s' % (self.uid, cbname, '_'.join([str(s) for s in states])) if name in self._inputs: raise ValueError('input %s already registered' % name) - self._inputs[name] = {'queue' : self.get_input_ep(input), + self._inputs[name] = {'queue' : self.get_input_ep(queue), + 'qname' : qname, 'states' : states} self._log.debug('registered input %s', name) @@ -720,14 +725,14 @@ def register_input(self, states, input, worker=None): if state in self._workers: self._log.warn("%s replaces worker %s (%s)" % (self.uid, self._workers[state], state)) - self._workers[state] = worker + self._workers[state] = cb - self._log.debug('registered worker %s [%s]', worker.__name__, state) + self._log.debug('registered worker %s [%s]', cbname, state) # -------------------------------------------------------------------------- # - def unregister_input(self, states, input, worker): + def unregister_input(self, states, queue, worker): ''' This methods is the inverse to the 'register_input()' method. ''' @@ -746,7 +751,7 @@ def unregister_input(self, states, input, worker): self._inputs[name]['queue'].stop() del(self._inputs[name]) - self._log.debug('unregistered input %s', name) + self._log.debug('=== unregistered input %s: %s', name, queue) for state in states: @@ -801,16 +806,16 @@ def register_output(self, states, output): # -------------------------------------------------------------------------- # - def get_input_ep(self, input): + def get_input_ep(self, queue): ''' return an input endpoint ''' # dig the addresses from the bridge's config file - fname = '%s/%s.cfg' % (self._cfg.path, input) + fname = '%s/%s.cfg' % (self._cfg.path, queue) cfg = ru.read_json(fname) - return ru.zmq.Getter(input, url=cfg['get']) + return ru.zmq.Getter(queue, url=cfg['get']) # -------------------------------------------------------------------------- @@ -1028,7 +1033,7 @@ def register_subscriber(self, pubsub, cb): log=self._log, prof=self._prof) - self._subscribers[pubsub].subscribe(topic=ru.as_string(pubsub), cb=cb, + self._subscribers[pubsub].subscribe(topic=pubsub, cb=cb, lock=self._cb_lock) @@ -1046,25 +1051,27 @@ def work_cb(self): # if no action occurs in this iteration, idle if not self._inputs: - time.sleep(0.01) + time.sleep(0.1) return True for name in self._inputs: - # self._log.debug('== work_cb %s', name) - - input = self._inputs[name]['queue'] + queue = self._inputs[name]['queue'] + qname = self._inputs[name]['qname'] states = self._inputs[name]['states'] # FIXME: a simple, 1-thing caching mechanism would likely # remove the req/res overhead completely (for any # non-trivial worker). - things = input.get_nowait(timeout=200) # microseconds + things = queue.get_nowait(qname=qname, timeout=200) # microseconds + # self._log.debug('work_cb %s: %s %s %d', name, queue.channel, + # qname, len(things)) if not things: - return True + continue - self._log.debug('== work_cb %s got %d ', name, len(things)) + # self._log.debug('work_cb %s:%s got %d (%s) : %s ', queue.channel, + # qname, len(things), things[0]['state'], states) # the worker target depends on the state of things, so we # need to sort the things into buckets by state before @@ -1082,7 +1089,7 @@ def work_cb(self): for state,things in buckets.items(): - assert(state in states), 'cannot handle state %s' % state + assert(state in states), 'cannot handle state %s' % state assert(state in self._workers), 'no worker for state %s' % state try: @@ -1129,8 +1136,8 @@ def work_cb(self): # -------------------------------------------------------------------------- # - def advance(self, things, state=None, publish=True, push=False, ts=None, - prof=True): + def advance(self, things, state=None, publish=True, push=False, qname=None, + ts=None, prof=True): ''' Things which have been operated upon are pushed down into the queues again, only to be picked up by the next component, according to their @@ -1141,6 +1148,7 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, state: new state to set for the things publish: determine if state update notifications should be issued push: determine if things should be pushed to outputs + qname: output queue name to push to (if applicable) prof: determine if state advance creates a profile event (publish, and push are always profiled) @@ -1238,7 +1246,7 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, if _state in rps.FINAL: # things in final state are dropped # for thing in _things: - # self._log.debug('final %s [%s]', thing['uid'], _state) + # self._log.debug('=== final %s [%s]', thing['uid'], _state) # self._prof.prof('drop', uid=thing['uid'], state=_state, # ts=ts) continue @@ -1246,7 +1254,7 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, if _state not in self._outputs: # unknown target state -- error # for thing in _things: - # # self._log.debug("lost %s [%s]", thing['uid'], _state) + # self._log.debug('=== lost %s [%s]', thing['uid'], _state) # # self._prof.prof('lost', uid=thing['uid'], state=_state, # # ts=ts) continue @@ -1254,7 +1262,7 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, if not self._outputs[_state]: # empty output -- drop thing # for thing in _things: - # self._log.debug('drop %s [%s]', thing['uid'], _state) + # self._log.debug('=== drop %s [%s]', thing['uid'], _state) # # self._prof.prof('drop', uid=thing['uid'], state=_state, # # ts=ts) continue @@ -1262,8 +1270,9 @@ def advance(self, things, state=None, publish=True, push=False, ts=None, output = self._outputs[_state] # push the thing down the drain - # self._log.debug('put bulk %s: %s', _state, len(_things)) - output.put(_things) + # self._log.debug('=== put bulk %s: %s: %s', _state, len(_things), + # output.channel) + output.put(_things, qname=qname) ts = time.time() # for thing in _things: diff --git a/src/radical/pilot/utils/prof_utils.py b/src/radical/pilot/utils/prof_utils.py index 35cfa4b185..3f19ed4a21 100644 --- a/src/radical/pilot/utils/prof_utils.py +++ b/src/radical/pilot/utils/prof_utils.py @@ -606,7 +606,7 @@ def get_session_profile(sid, src=None): # ------------------------------------------------------------------------------ # -def get_session_description(sid, src=None, dburl=None): +def get_session_description(sid, src=None): """ This will return a description which is usable for radical.analytics evaluation. It informs about @@ -617,19 +617,12 @@ def get_session_description(sid, src=None, dburl=None): If `src` is given, it is interpreted as path to search for session information (json dump). `src` defaults to `$PWD/$sid`. - - if `dburl` is given, its value is used to fetch session information from - a database. The dburl value defaults to `RADICAL_PILOT_DBURL`. """ if not src: src = "%s/%s" % (os.getcwd(), sid) - if os.path.isfile('%s/%s.json' % (src, sid)): - json = ru.read_json('%s/%s.json' % (src, sid)) - else: - ftmp = fetch_json(sid=sid, dburl=dburl, tgt=src, skip_existing=True) - json = ru.read_json(ftmp) + json = ru.read_json('%s/%s.json' % (src, sid)) # make sure we have uids # FIXME v0.47: deprecate diff --git a/src/radical/pilot/utils/session.py b/src/radical/pilot/utils/session.py index f184e7e8fb..f3add31113 100644 --- a/src/radical/pilot/utils/session.py +++ b/src/radical/pilot/utils/session.py @@ -13,7 +13,7 @@ # ------------------------------------------------------------------------------ # -def fetch_profiles (sid, dburl=None, src=None, tgt=None, access=None, +def fetch_profiles (sid, src=None, tgt=None, access=None, session=None, skip_existing=False, fetch_client=False, log=None): ''' sid: session for which all profiles are fetched @@ -34,12 +34,6 @@ def fetch_profiles (sid, dburl=None, src=None, tgt=None, access=None, ret = list() - if not dburl: - dburl = os.environ['RADICAL_PILOT_DBURL'] - - if not dburl: - raise ValueError('RADICAL_PILOT_DBURL is not set') - if not src: src = os.getcwd() @@ -81,12 +75,12 @@ def fetch_profiles (sid, dburl=None, src=None, tgt=None, access=None, if not os.path.isfile(client_profile): raise RuntimeError('profile %s does not exist' % client_profile) - _, db, _, _, _ = ru.mongodb_connect (dburl) - - json_docs = get_session_docs(db, sid) - - pilots = json_docs['pilot'] + # FIXME: MongoDB + json_docs = ... + return + pilots = json_docs['pilot'] num_pilots = len(pilots) + log.debug("Session: %s", sid) log.debug("Number of pilots in session: %d", num_pilots) @@ -195,7 +189,7 @@ def fetch_profiles (sid, dburl=None, src=None, tgt=None, access=None, # ------------------------------------------------------------------------------ # -def fetch_logfiles (sid, dburl=None, src=None, tgt=None, access=None, +def fetch_logfiles (sid, src=None, tgt=None, access=None, session=None, skip_existing=False, fetch_client=False, log=None): ''' sid: session for which all logfiles are fetched @@ -214,12 +208,6 @@ def fetch_logfiles (sid, dburl=None, src=None, tgt=None, access=None, ret = list() - if not dburl: - dburl = os.environ['RADICAL_PILOT_DBURL'] - - if not dburl: - raise RuntimeError ('Please set RADICAL_PILOT_DBURL') - if not src: src = os.getcwd() @@ -254,12 +242,13 @@ def fetch_logfiles (sid, dburl=None, src=None, tgt=None, access=None, log_file.copy(ftgt, flags=rs.fs.CREATE_PARENTS) log_file.close() - _, db, _, _, _ = ru.mongodb_connect (dburl) - json_docs = get_session_docs(db, sid) - - pilots = json_docs['pilot'] + # FIXME: MongoDB + json_docs = ... + return + pilots = json_docs['pilot'] num_pilots = len(pilots) + log.info("Session: %s", sid) log.info("Number of pilots in session: %d", num_pilots) @@ -370,7 +359,7 @@ def fetch_logfiles (sid, dburl=None, src=None, tgt=None, access=None, # ------------------------------------------------------------------------------ # -def fetch_json(sid, dburl=None, tgt=None, skip_existing=False, session=None, +def fetch_json(sid, tgt=None, skip_existing=False, session=None, log=None): ''' returns file name @@ -402,21 +391,11 @@ def fetch_json(sid, dburl=None, tgt=None, skip_existing=False, session=None, log.info("session already in %s", dst) else: - if not dburl: - dburl = os.environ.get('RADICAL_PILOT_DBURL') - - if not dburl: - raise ValueError('RADICAL_PILOT_DBURL is not set') - - mongo, db, _, _, _ = ru.mongodb_connect(dburl) - - json_docs = get_session_docs(db, sid) + json_docs = get_session_docs(sid) ru.write_json(json_docs, dst) log.info("session written to %s", dst) - mongo.close() - rep.ok("+ %s (json)\n" % sid) return dst diff --git a/src/radical/pilot/worker/stager.py b/src/radical/pilot/worker/stager.py index 9372addb2f..d39be8093f 100644 --- a/src/radical/pilot/worker/stager.py +++ b/src/radical/pilot/worker/stager.py @@ -37,7 +37,6 @@ def __init__(self, cfg, session): def initialize(self): self._sid = self._cfg['sid'] - self._dburl = self._cfg['dburl'] self.register_input(rps.NEW, rpc.STAGER_REQUEST_QUEUE, self.work) self.register_publisher(rpc.STAGER_RESPONSE_PUBSUB) From 577ffd40b58cfd2d872179d9fbe1f4cc049d17f5 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sun, 28 Mar 2021 11:49:52 +0200 Subject: [PATCH 016/171] snapshot --- bin/radical-pilot-proxy-server | 1 - examples/00_getting_started.py | 6 ++--- .../pilot/agent/launch_method/aprun.py | 4 ++-- src/radical/pilot/agent/launch_method/base.py | 2 -- src/radical/pilot/agent/launch_method/fork.py | 21 +++++------------ .../pilot/agent/launch_method/ibrun.py | 23 ++++++++----------- src/radical/pilot/pilot_description.py | 5 +--- src/radical/pilot/task.py | 2 +- src/radical/pilot/task_description.py | 5 +--- src/radical/pilot/task_manager.py | 14 +++++------ 10 files changed, 30 insertions(+), 53 deletions(-) diff --git a/bin/radical-pilot-proxy-server b/bin/radical-pilot-proxy-server index f84c535b0e..017e352f89 100755 --- a/bin/radical-pilot-proxy-server +++ b/bin/radical-pilot-proxy-server @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import sys -import zmq import time import queue diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 323475051f..13ef3c2b02 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -56,9 +56,9 @@ 'runtime' : 120, # pilot runtime (min) 'exit_on_error' : True, 'project' : config[resource].get('project', None), - 'queue' : config[resource].get('queue', None), - 'access_schema' : config[resource].get('schema', None), - 'nodes' : 1024 * 4, + 'queue' : config[resource].get('queue', None), + 'access_schema' : config[resource].get('schema', None), + 'cores' : config[resource].get('cores', None), } pdesc = rp.PilotDescription(pd_init) diff --git a/src/radical/pilot/agent/launch_method/aprun.py b/src/radical/pilot/agent/launch_method/aprun.py index ee3ed78c98..3b3c5f2b39 100644 --- a/src/radical/pilot/agent/launch_method/aprun.py +++ b/src/radical/pilot/agent/launch_method/aprun.py @@ -54,8 +54,8 @@ def construct_command(self, t, launch_script_hop): # 'cpu_threads' : 2, # 'gpu_processes : 2, # 'slots': - # { # 'nodes': [{'name': node_name, - # # 'uid': node_uid, + # { # 'ranks': [{'node': node_name, + # # 'node_id': node_uid, # # 'core_map': [core_map], # # 'gpu_map': [gpu_map], # # 'lfs': lfs}], diff --git a/src/radical/pilot/agent/launch_method/base.py b/src/radical/pilot/agent/launch_method/base.py index ca4e50665c..91940735b1 100644 --- a/src/radical/pilot/agent/launch_method/base.py +++ b/src/radical/pilot/agent/launch_method/base.py @@ -203,7 +203,6 @@ def rm_config_hook(cls, name, cfg, rm, log, profiler): if cls != LaunchMethod: raise TypeError("LaunchMethod config hook only available to base class!") - from .fork import Fork from .prte import PRTE from .prte2 import PRTE2 from .flux import Flux @@ -215,7 +214,6 @@ def rm_config_hook(cls, name, cfg, rm, log, profiler): # from .orte import ORTE impl = { - LM_NAME_FORK : Fork, LM_NAME_PRTE : PRTE, LM_NAME_PRTE2 : PRTE2, LM_NAME_FLUX : Flux, diff --git a/src/radical/pilot/agent/launch_method/fork.py b/src/radical/pilot/agent/launch_method/fork.py index 76502fbd0f..1ffb045d1a 100644 --- a/src/radical/pilot/agent/launch_method/fork.py +++ b/src/radical/pilot/agent/launch_method/fork.py @@ -20,15 +20,9 @@ def __init__(self, name, cfg, session): # -------------------------------------------------------------------------- # def _configure(self): - # "Regular" tasks + self.launch_command = '' - # -------------------------------------------------------------------------- - # - @classmethod - def rm_config_hook(cls, name, cfg, rm, log, profiler): - return {'version_info': { - name: {'version': '0.42', 'version_detail': 'There is no spoon'}}} # -------------------------------------------------------------------------- # @@ -38,16 +32,13 @@ def construct_command(self, t, launch_script_hop): # the needful td = t['description'] - task_exec = td['executable'] - task_args = td.get('arguments') or [] - task_argstr = self._create_arg_string(task_args) + task_exec = td['executable'] + task_args = td.get('arguments') or [] + task_argstr = self._create_arg_string(task_args) - if task_argstr: - command = "%s %s" % (task_exec, task_argstr) - else: - command = task_exec + command = "%s %s" % (task_exec, task_argstr) - return command, None + return command.strip(), None # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/agent/launch_method/ibrun.py b/src/radical/pilot/agent/launch_method/ibrun.py index b1b5beba8b..1ce595c4b6 100644 --- a/src/radical/pilot/agent/launch_method/ibrun.py +++ b/src/radical/pilot/agent/launch_method/ibrun.py @@ -34,15 +34,15 @@ def _configure(self): # def construct_command(self, t, launch_script_hop): - slots = t['slots'] + slots = t['slots'] td = t['description'] - task_exec = td['executable'] - task_args = td.get('arguments') or [] - task_argstr = self._create_arg_string(task_args) - task_env = td.get('environment') or dict() + task_exec = td['executable'] + task_args = td.get('arguments') or [] + task_argstr = self._create_arg_string(task_args) + task_env = td.get('environment') or {} - n_tasks = td['cpu_processes'] + n_tasks = td['cpu_processes'] # Usage of env variable TACC_TASKS_PER_NODE is purely for MPI tasks, # and threads are not considered (info provided by TACC support) @@ -74,16 +74,11 @@ def construct_command(self, t, launch_script_hop): if offsets: ibrun_offset = min(offsets) - if task_argstr: - task_command = "%s %s" % (task_exec, task_argstr) - else: - task_command = task_exec - - ibrun_command = "%s -n %s -o %d %s" % \ + ibrun_command = "%s -n %s -o %d %s %s" % \ (self.launch_command, n_tasks, - ibrun_offset, task_command) + ibrun_offset, task_exec, task_argstr) - return ibrun_command, None + return ibrun_command.strip(), None # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/pilot_description.py b/src/radical/pilot/pilot_description.py index ed86a45fd1..e9ef359340 100644 --- a/src/radical/pilot/pilot_description.py +++ b/src/radical/pilot/pilot_description.py @@ -250,10 +250,7 @@ class PilotDescription(ru.Description): # def __init__(self, from_dict=None): - ru.Description.__init__(self, from_dict=PilotDescription._defaults) - - if from_dict: - self.update(from_dict) + ru.Description.__init__(self, from_dict=from_dict) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/task.py b/src/radical/pilot/task.py index 168aefa28a..641e2b65f0 100644 --- a/src/radical/pilot/task.py +++ b/src/radical/pilot/task.py @@ -466,7 +466,7 @@ def wait(self, state=None, timeout=None): # we will never see another state progression. Raise an error # (unless we waited for this) if self.state in states: - return + return self.state # FIXME: do we want a raise here, really? This introduces a race, # really, on application level diff --git a/src/radical/pilot/task_description.py b/src/radical/pilot/task_description.py index c1d3e0ce32..47aa5a81d9 100644 --- a/src/radical/pilot/task_description.py +++ b/src/radical/pilot/task_description.py @@ -420,10 +420,7 @@ class TaskDescription(ru.Description): # def __init__(self, from_dict=None): - ru.Description.__init__(self, from_dict=TaskDescription._defaults) - - if from_dict: - self.update(from_dict) + ru.Description.__init__(self, from_dict=from_dict) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index e3339314a5..6d093e3478 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -14,7 +14,7 @@ from . import states as rps from . import constants as rpc -from . import task_description as rpcud +from . import task_description as rptd # bulk callbacks are implemented, but are currently not used nor exposed. @@ -334,8 +334,8 @@ def _pilot_state_cb(self, pilots, state=None): self._log.debug('task %s is restartable', task['uid']) task['restarted'] = True - ud = rpcud.TaskDescription(task['description']) - to_restart.append(ud) + td = rptd.TaskDescription(task['description']) + to_restart.append(td) # FIXME: increment some restart counter in the description? # FIXME: reference the resulting new uid in the old task. @@ -737,12 +737,12 @@ def submit_tasks(self, descriptions): self._rep.progress_tgt(len(descriptions), label='submit') tasks = list() ret = list() - for ud in descriptions: + for td in descriptions: - if not ud.executable: + if not td.executable: raise ValueError('task executable must be defined') - task = Task(tmgr=self, descr=ud) + task = Task(tmgr=self, descr=td) tasks.append(task) # keep tasks around @@ -830,7 +830,7 @@ def wait_units(self, uids=None, state=None, timeout=None): # def wait_tasks(self, uids=None, state=None, timeout=None): """ - Returns when one or more :class:`radical.pilot.Tasks` reach a + Returns when the given :class:`radical.pilot.Tasks` reach a specific state. If `uids` is `None`, `wait_tasks` returns when **all** From 186e56f7202a86e8874326cc09f559555eb342b6 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 27 Sep 2021 14:31:55 +0200 Subject: [PATCH 017/171] fixed port for bridge --- bin/radical-pilot-agent-bridge | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/radical-pilot-agent-bridge b/bin/radical-pilot-agent-bridge index 955b1befe7..ea1f3329e3 100755 --- a/bin/radical-pilot-agent-bridge +++ b/bin/radical-pilot-agent-bridge @@ -140,7 +140,7 @@ class ZMQBridge(ru.zmq.Server): self._lock = mt.Lock() self._clients = dict() - ru.zmq.Server.__init__(self) + ru.zmq.Server.__init__(self, url='tcp://*:44444') self._monitor = mt.Thread(target=self._monitor) self._monitor.daemon = True From 7b5b21d62412e4837526fadf769d9542da90a101 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 26 Nov 2021 20:56:11 +0100 Subject: [PATCH 018/171] merge-di-merge --- examples/00_getting_started.py | 25 - src/radical/pilot/agent/agent_0.py | 224 +-------- .../pilot/agent/launch_method/aprun.py | 18 - src/radical/pilot/agent/launch_method/base.py | 19 - src/radical/pilot/agent/launch_method/flux.py | 4 - src/radical/pilot/agent/launch_method/fork.py | 18 - .../pilot/agent/launch_method/ibrun.py | 20 - src/radical/pilot/agent/scheduler/base.py | 440 +++--------------- .../pilot/agent/scheduler/continuous.py | 5 - src/radical/pilot/configs/agent_default.json | 16 +- src/radical/pilot/configs/resource_debug.json | 19 +- src/radical/pilot/pilot.py | 3 - src/radical/pilot/pilot_manager.py | 24 - src/radical/pilot/pmgr/launching/default.py | 23 +- 14 files changed, 75 insertions(+), 783 deletions(-) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index c95a2a71eb..5eb12190f7 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -43,11 +43,7 @@ # read the config used for resource details config = ru.read_json('%s/config.json' -<<<<<<< HEAD - % os.path.dirname(os.path.abspath(__file__))) -======= % os.path.dirname(__file__)).get(resource, {}) ->>>>>>> devel pmgr = rp.PilotManager(session=session) tmgr = rp.TaskManager(session=session) @@ -60,18 +56,11 @@ pd_init = {'resource' : resource, 'runtime' : 120, # pilot runtime (min) 'exit_on_error' : True, -<<<<<<< HEAD - 'project' : config[resource].get('project', None), - 'queue' : config[resource].get('queue', None), - 'access_schema' : config[resource].get('schema', None), - 'cores' : config[resource].get('cores', None), -======= 'project' : config.get('project', None), 'queue' : config.get('queue', None), 'access_schema' : config.get('schema', None), 'cores' : config.get('cores', None), 'gpus' : config.get('gpus', 0), ->>>>>>> devel } pdesc = rp.PilotDescription(pd_init) @@ -91,19 +80,6 @@ tds = list() for i in range(0, n): - # create a new task description, and fill it. -<<<<<<< HEAD - tds.append(rp.TaskDescription({ - - 'sandbox' : 'task_sandbox', - 'executable' : '%s/examples/hello_rp.sh' % os.getcwd(), - 'arguments' : [random.randint(1,10) + 10], - 'cpu_process_type': rp.MPI, - 'cpu_processes' : random.choice([1, 2, 4, 8, 16]), - 'cpu_threads' : random.randint(1,2), - 'gpu_processes' : random.choice([0,0,0,0,0,0,1,2]), - })) -======= # Here we don't use dict initialization. td = rp.TaskDescription() td.stage_on_error = True @@ -111,7 +87,6 @@ td.cpu_processes = 1 tds.append(td) ->>>>>>> devel report.progress() report.progress_done() diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 969f51adef..ab35ee51d6 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -7,13 +7,10 @@ import pprint import stat import time -<<<<<<< HEAD import pprint import threading as mt import subprocess as sp import multiprocessing as mp -======= ->>>>>>> devel import radical.utils as ru @@ -50,12 +47,8 @@ def __init__(self, cfg, session): self._pmgr = cfg.pmgr self._pwd = cfg.pilot_sandbox self._session = session -<<<<<<< HEAD self._sid = self._session.uid self._log = session._log -======= - self._log = ru.Logger(self._uid, ns='radical.pilot') ->>>>>>> devel self._starttime = time.time() self._final_cause = None @@ -108,11 +101,7 @@ def __init__(self, cfg, session): # run our own slow-paced heartbeat monitor to watch pmgr heartbeats # FIXME: we need to get pmgr freq -<<<<<<< HEAD freq = 100 -======= - freq = 60 ->>>>>>> devel tint = freq / 3 tout = freq * 10 self._hb = ru.Heartbeat(uid=self._uid, @@ -305,19 +294,6 @@ def initialize(self): 'cpu' : rm_info['cores_per_node'] * n_nodes, 'gpu' : rm_info['gpus_per_node'] * n_nodes}} -<<<<<<< HEAD -======= - # sub-agents are started, components are started, bridges are up: we are - # ready to roll! Update pilot state. - pilot = {'type' : 'pilot', - 'uid' : self._pid, - 'state' : rps.PMGR_ACTIVE, - 'resource_details' : { - # 'lm_info' : self._rm.lm_info.get('version_info'), - # 'lm_detail' : self._rm.lm_info.get('lm_detail'), - 'rm_info' : self._rm.info}, - '$set' : ['resource_details']} ->>>>>>> devel self.advance(pilot, publish=True, push=False) @@ -361,7 +337,6 @@ def finalize(self): if self._rm: self._rm.stop() -<<<<<<< HEAD self._log.info('rusage: %s', rpu.get_rusage()) out, err, log = '', '', '' @@ -372,9 +347,8 @@ def finalize(self): except: pass try : log = open('./agent.0.log', 'r').read(1024) except: pass -======= + self._reg_service.stop() ->>>>>>> devel if self._final_cause == 'timeout' : state = rps.DONE elif self._final_cause == 'cancel' : state = rps.CANCELED @@ -393,31 +367,10 @@ def finalize(self): 'logfile': log, 'state' : state} -<<<<<<< HEAD self._log.debug('=== push final state update') self._log.debug('update state: %s: %s', state, self._final_cause) self.publish(rpc.PROXY_STATE_PUBSUB, topic=rpc.STATE_PUBSUB, msg=[pilot]) -======= - out, err, log = '', '', '' - - try : out = ru.ru_open('./agent.0.out', 'r').read(1024) - except: pass - try : err = ru.ru_open('./agent.0.err', 'r').read(1024) - except: pass - try : log = ru.ru_open('./agent.0.log', 'r').read(1024) - except: pass - - ret = self._dbs._c.update({'type' : 'pilot', - 'uid' : self._pid}, - {'$set' : {'stdout' : rpu.tail(out), - 'stderr' : rpu.tail(err), - 'logfile': rpu.tail(log), - 'state' : state}, - '$push': {'states' : state} - }) - self._log.debug('update ret: %s', ret) ->>>>>>> devel # -------------------------------------------------------------------- @@ -603,64 +556,6 @@ def _start_sub_agents(self): 'lfs' : 0, 'mem' : 0}]} } -<<<<<<< HEAD - cmd, hop = agent_lm.construct_command(agent_cmd, - launch_script_hop='/usr/bin/env RP_SPAWNER_HOP=TRUE "%s"' % ls_name) - - with open (ls_name, 'w') as ls: - # note that 'exec' only makes sense if we don't add any - # commands (such as post-processing) after it. - ls.write('#!/bin/sh\n\n') - for k,v in agent_cmd['description'].get('environment', {}).items(): - ls.write('export "%s"="%s"\n' % (k, v)) - ls.write('\n') - for pe_cmd in agent_cmd['description'].get('pre_exec', []): - ls.write('%s\n' % pe_cmd) - ls.write('\n') - ls.write('exec %s\n\n' % cmd) - st = os.stat(ls_name) - os.chmod(ls_name, st.st_mode | stat.S_IEXEC) - - if hop : cmdline = hop - else : cmdline = ls_name - - # ------------------------------------------------------------------ - class _SA(mp.Process): - - def __init__(self, sa, cmd, log): - self._name = sa - self._cmd = cmd.split() - self._log = log - self._proc = None - super(_SA, self).__init__(name=self._name) - - self.start() - - - def run(self): - - sys.stdout = open('%s.out' % self._name, 'w') - sys.stderr = open('%s.err' % self._name, 'w') - out = open('%s.out' % self._name, 'w') - err = open('%s.err' % self._name, 'w') - self._proc = sp.Popen(args=self._cmd, stdout=out, stderr=err) - self._log.debug('sub-agent %s spawned [%s]', self._name, - self._proc) - - assert(self._proc) - - # FIXME: lifetime, use daemon agent launcher - while True: - time.sleep(0.1) - if self._proc.poll() is None: - return True # all is well - else: - return False # proc is gone - terminate - # ------------------------------------------------------------------ - - # spawn the sub-agent - assert(cmdline) -======= # find a launcher to use launcher = self._rm.find_launcher(agent_task) @@ -695,7 +590,6 @@ def run(self): # spawn the sub-agent cmdline = launch_script ->>>>>>> devel self._log.info ('create sub-agent %s: %s' % (sa, cmdline)) ru.sh_callout_bg(cmdline, stdout='%s.out' % sa, stderr='%s.err' % sa) @@ -730,37 +624,12 @@ def _proxy_state_cb(self, topic, msg): self.publish(rpc.PROXY_STATE_PUBSUB, topic=topic, msg=msg) -<<<<<<< HEAD # -------------------------------------------------------------------------- # def _proxy_control_cb(self, topic, msg): -======= - self._log.debug('pilot command: %s: %s', cmd, arg) - self._prof.prof('cmd', msg="%s : %s" % (cmd, arg), uid=self._pid) - - if cmd == 'heartbeat' and arg['pmgr'] == self._pmgr: - self._hb.beat(uid=self._pmgr) - - elif cmd == 'cancel_pilot': - self._log.info('cancel pilot cmd') - self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'terminate', - 'arg' : None}) - self._final_cause = 'cancel' - self.stop() - - return False # we are done - - elif cmd == 'cancel_tasks': - self._log.info('cancel_tasks cmd') - self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'cancel_tasks', - 'arg' : arg}) - else: - self._log.warn('could not interpret cmd "%s" - ignore', cmd) ->>>>>>> devel self._log.debug('=== proxy control: %s', msg) - cmd = msg['cmd'] arg = msg['arg'] @@ -773,10 +642,6 @@ def _proxy_control_cb(self, topic, msg): self._hb.beat(uid=self._pmgr) return True -<<<<<<< HEAD -======= - self._log.debug('rpc req: %s', rpc_req) ->>>>>>> devel if cmd == 'prep_env': @@ -834,12 +699,10 @@ def _check_control(self, _, msg): ret = None rpc_res = {'uid': arg['uid']} + try: -<<<<<<< HEAD - print(arg) ret = None -======= ->>>>>>> devel + if req == 'hello' : ret = 'hello %s' % ' '.join(arg['arg']) @@ -865,91 +728,10 @@ def _check_control(self, _, msg): return True - # -------------------------------------------------------------------------- - # -<<<<<<< HEAD - def _prepare_env(self, eid, env_spec): -======= - def _check_state(self): - - # Make sure that we haven't exceeded the runtime - otherwise terminate. - if self._cfg.runtime: - - if time.time() >= self._starttime + (int(self._cfg.runtime) * 60): - - self._log.info('runtime limit (%ss).', self._cfg.runtime * 60) - self._final_cause = 'timeout' - self.stop() - return False # we are done - - return True - - - # -------------------------------------------------------------------------- - # - def _check_tasks_cb(self): - - # Check for tasks waiting for input staging and log pull. - # - # FIXME: Unfortunately, 'find_and_modify' is not bulkable, so we have - # to use 'find'. To avoid finding the same tasks over and over - # again, we update the 'control' field *before* running the next - # find -- so we do it right here. - # This also blocks us from using multiple ingest threads, or from - # doing late binding by task pull :/ - task_cursor = self._dbs._c.find({'type' : 'task', - 'pilot' : self._pid, - 'control' : 'agent_pending'}) - if not task_cursor.count(): - self._log.info('tasks pulled: 0') - return True - - # update the tasks to avoid pulling them again next time. - task_list = list(task_cursor) - task_uids = [task['uid'] for task in task_list] - - self._dbs._c.update({'type' : 'task', - 'uid' : {'$in' : task_uids}}, - {'$set' : {'control' : 'agent'}}, - multi=True) - - self._log.info("tasks pulled: %4d", len(task_list)) - self._prof.prof('get', msg='bulk: %d' % len(task_list), uid=self._pid) - - for task in task_list: - - # make sure the tasks obtain env settings (if needed) - if 'task_environment' in self._cfg: - if not task['description'].get('environment'): - task['description']['environment'] = dict() - for k,v in self._cfg['task_environment'].items(): - task['description']['environment'][k] = v - - # we need to make sure to have the correct state: - task['state'] = rps._task_state_collapse(task['states']) - self._prof.prof('get', uid=task['uid']) - - # FIXME: raise or fail task! - if task['state'] != rps.AGENT_STAGING_INPUT_PENDING: - self._log.error('invalid state: %s', (pprint.pformat(task))) - - task['control'] = 'agent' - - # now we really own the CUs, and can start working on them (ie. push - # them into the pipeline). We don't publish nor profile as advance, - # since that happened already on the module side when the state was set. - self.advance(task_list, publish=False, push=True) - - return True - - # -------------------------------------------------------------------------- # def _prepare_env(self, env_name, env_spec): - print(env_spec) ->>>>>>> devel - etype = env_spec['type'] evers = env_spec['version'] emods = env_spec.get('setup') or [] diff --git a/src/radical/pilot/agent/launch_method/aprun.py b/src/radical/pilot/agent/launch_method/aprun.py index a662d82c9c..9dd66c71fc 100644 --- a/src/radical/pilot/agent/launch_method/aprun.py +++ b/src/radical/pilot/agent/launch_method/aprun.py @@ -90,23 +90,6 @@ def get_launch_cmds(self, task, exec_path): # 'cpu_threads' : 2, # 'gpu_processes : 2, # 'slots': -<<<<<<< HEAD - # { # 'ranks': [{'node': node_name, - # # 'node_id': node_uid, - # # 'core_map': [core_map], - # # 'gpu_map': [gpu_map], - # # 'lfs': lfs}], - # 'nodes' : [{ 'name': node_1, - # 'uid': node_uid_1, - # 'core_map': [[0, 2], [4, 6]], - # 'gpu_map': [[0]], - # 'lfs': 1024}, - # { 'name': node_2, - # 'uid': node_uid_2, - # 'core_map': [[1, 3], [5, 7]], - # 'gpu_map': [[0]], - # 'lfs': 1024} -======= # { # 'ranks': [{'node_name': node_name, # # 'node_id' : node_id, # # 'core_map' : [core_map], @@ -122,7 +105,6 @@ def get_launch_cmds(self, task, exec_path): # 'core_map' : [[1, 3], [5, 7]], # 'gpu_map' : [[0]], # 'lfs' : 1024} ->>>>>>> devel # ], # 'cores_per_node': 8, # 'gpus_per_node' : 1, diff --git a/src/radical/pilot/agent/launch_method/base.py b/src/radical/pilot/agent/launch_method/base.py index aec5f75cc0..452da8c9f2 100644 --- a/src/radical/pilot/agent/launch_method/base.py +++ b/src/radical/pilot/agent/launch_method/base.py @@ -155,31 +155,12 @@ def _init_from_scratch(self, env, env_sh): raise NotImplementedError("incomplete LaunchMethod %s" % self.name) -<<<<<<< HEAD - from .prte import PRTE - from .prte2 import PRTE2 - from .flux import Flux - from .jsrun import JSRUN - from .yarn import Yarn - from .spark import Spark -======= ->>>>>>> devel # -------------------------------------------------------------------------- # def _init_from_info(self, lm_info): -<<<<<<< HEAD - impl = { - LM_NAME_PRTE : PRTE, - LM_NAME_PRTE2 : PRTE2, - LM_NAME_FLUX : Flux, - LM_NAME_JSRUN : JSRUN, - LM_NAME_YARN : Yarn, - LM_NAME_SPARK : Spark, -======= raise NotImplementedError("incomplete LaunchMethod %s" % self.name) ->>>>>>> devel # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/agent/launch_method/flux.py b/src/radical/pilot/agent/launch_method/flux.py index 3d34f401c6..91c0d72940 100644 --- a/src/radical/pilot/agent/launch_method/flux.py +++ b/src/radical/pilot/agent/launch_method/flux.py @@ -63,11 +63,7 @@ def _init_from_info(self, lm_info): self._fh = ru.FluxHelper() self._fh.connect_flux(uri=self._details['flux_uri']) -<<<<<<< HEAD - log.info('starting flux watcher') -======= self._prof.prof('flux_reconnect_ok') ->>>>>>> devel # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/agent/launch_method/fork.py b/src/radical/pilot/agent/launch_method/fork.py index 18979bdd02..beba1625b0 100644 --- a/src/radical/pilot/agent/launch_method/fork.py +++ b/src/radical/pilot/agent/launch_method/fork.py @@ -23,12 +23,6 @@ def __init__(self, name, lm_cfg, rm_info, log, prof): # -------------------------------------------------------------------------- # -<<<<<<< HEAD - def _configure(self): - - self.launch_command = '' - -======= def _init_from_scratch(self, env, env_sh): lm_info = {'env' : env, @@ -44,7 +38,6 @@ def _init_from_info(self, lm_info): self._env = lm_info['env'] self._env_sh = lm_info['env_sh'] ->>>>>>> devel # -------------------------------------------------------------------------- # @@ -52,16 +45,6 @@ def finalize(self): pass -<<<<<<< HEAD - td = t['description'] - task_exec = td['executable'] - task_args = td.get('arguments') or [] - task_argstr = self._create_arg_string(task_args) - - command = "%s %s" % (task_exec, task_argstr) - - return command.strip(), None -======= # -------------------------------------------------------------------------- # @@ -112,7 +95,6 @@ def get_rank_exec(self, task, rank_id, rank): command = '%s %s' % (task_exec, task_argstr) return command.rstrip() ->>>>>>> devel # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/agent/launch_method/ibrun.py b/src/radical/pilot/agent/launch_method/ibrun.py index 5f32a0f848..8a264ce072 100644 --- a/src/radical/pilot/agent/launch_method/ibrun.py +++ b/src/radical/pilot/agent/launch_method/ibrun.py @@ -67,17 +67,6 @@ def can_launch(self, task): # def get_launcher_env(self): -<<<<<<< HEAD - slots = t['slots'] - td = t['description'] - - task_exec = td['executable'] - task_args = td.get('arguments') or [] - task_argstr = self._create_arg_string(task_args) - task_env = td.get('environment') or {} - - n_tasks = td['cpu_processes'] -======= return ['. $RP_PILOT_SANDBOX/%s' % self._env_sh] @@ -88,7 +77,6 @@ def get_launch_cmds(self, task, exec_path): slots = task['slots'] td = task['description'] n_tasks = td['cpu_processes'] ->>>>>>> devel # Usage of env variable TACC_TASKS_PER_NODE is purely for MPI tasks, # threads are not considered (info provided by TACC support) @@ -119,13 +107,6 @@ def get_launch_cmds(self, task, exec_path): if offsets: ibrun_offset = min(offsets) -<<<<<<< HEAD - ibrun_command = "%s -n %s -o %d %s %s" % \ - (self.launch_command, n_tasks, - ibrun_offset, task_exec, task_argstr) - - return ibrun_command.strip(), None -======= cmd = '%s -n %s -o %d %s' % (self._command, n_tasks, ibrun_offset, exec_path) return cmd.rstrip() @@ -153,7 +134,6 @@ def get_rank_exec(self, task, rank_id, rank): command = '%s %s' % (task_exec, task_argstr) return command.rstrip() ->>>>>>> devel # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 1ca85eed3d..45604bcb57 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -37,9 +37,6 @@ # SCHEDULER_NAME_SCATTERED = "SCATTERED" -CHUNKSIZE = 1024 # break sort of incoming tasks not and then to - # actually schedule them - # ------------------------------------------------------------------------------ # # An RP agent scheduler will place incoming tasks onto a set of cores and gpus. @@ -465,7 +462,7 @@ def slot_status(self, msg=None): ret += glyphs[gpu] ret += '|' - # self._log.debug("status: %-30s: %s", msg, ret) + self._log.debug("status: %-30s: %s", msg, ret) return ret @@ -488,8 +485,6 @@ def _refresh_ts_map(self): if not self._waitpool: return - # self._prof.prof('tsmap_start') - for uid, task in self._waitpool.items(): ts = task['tuple_size'] if ts not in self._ts_map: @@ -497,7 +492,6 @@ def _refresh_ts_map(self): self._ts_map[ts].add(uid) self._ts_valid = True - # self._prof.prof('tsmap_stop') # -------------------------------------------------------------------------- @@ -542,12 +536,7 @@ def unschedule_cb(self, topic, msg): release (for whatever reason) all slots allocated to this task ''' - if not isinstance(msg, list): - self._log.error('msg is not list: %s', msg) - msg = [msg] - self._queue_unsched.put(msg) - # self._log.debug('unscheduled from queue : %d', len(msg)) # return True to keep the cb registered return True @@ -622,39 +611,24 @@ def _schedule_tasks(self): resources = True # fresh start, all is free while not self._proc_term.is_set(): -<<<<<<< HEAD - # idle if this iteration changes no state - old_state = [resources, len(self._waitpool)] -======= self._log.debug_3('=== schedule tasks 0: %s, w: %d', resources, len(self._waitpool)) ->>>>>>> devel - # self._log.debug('schedule tasks 0: RX %s [%d]', resources, len(self._waitpool)) + active = 0 # see if we do anything in this iteration # if we have new resources, try to place waiting tasks. r_wait = False if resources: -<<<<<<< HEAD - r_wait = self._schedule_waitpool() - # self._log.debug('scheduled tasks w: RX %s %s', resources, r_wait) -======= r_wait, a = self._schedule_waitpool() active += int(a) self._log.debug_3('=== schedule tasks w: %s %s', r_wait, a) ->>>>>>> devel # always try to schedule newly incoming tasks # running out of resources for incoming could still mean we have # smaller slots for waiting tasks, so ignore `r` for now. -<<<<<<< HEAD - r_inc = self._schedule_incoming() - # self._log.debug('scheduled tasks i: RX %s %s', resources, r_inc) -======= r_inc, a = self._schedule_incoming() active += int(a) self._log.debug_3('=== schedule tasks i: %s %s', r_inc, a) ->>>>>>> devel # if we had resources, but could not schedule any incoming not any # waiting, then we effectively ran out of *useful* resources @@ -665,12 +639,9 @@ def _schedule_tasks(self): # if tasks got unscheduled (and not replaced), then we have new # space to schedule waiting tasks (unless we have resources from # before) - r = self._unschedule_completed() + r, a = self._unschedule_completed() if not resources and r: resources = True -<<<<<<< HEAD - # self._log.debug('scheduled tasks c: RX %s %s', resources, r) -======= active += int(a) self._log.debug_3('=== schedule tasks c: %s %s', r, a) @@ -678,11 +649,7 @@ def _schedule_tasks(self): time.sleep(0.1) # FIXME: configurable self._log.debug_3('=== schedule tasks x: %s %s', resources, active) ->>>>>>> devel - # idle if this iteration changes no state - if old_state != [resources, len(self._waitpool)]: - time.sleep(0.1) # -------------------------------------------------------------------------- # @@ -696,9 +663,7 @@ def _prof_sched_skip(self, task): # def _schedule_waitpool(self): - # self._log.debug("schedule waitpool %d waiting" % len(self._waitpool)) - - resources = None # default: no change to resource state + # self.slot_status("before schedule waitpool") # sort by inverse tuple size to place larger tasks first and backfill # with smaller tasks. We only look at cores right now - this needs @@ -706,7 +671,6 @@ def _schedule_waitpool(self): # We define `tuple_size` as # `(cpu_processes + gpu_processes) * cpu_threads` # - # FIXME: cache tuple size metric to_wait = list() to_test = list() @@ -724,7 +688,6 @@ def _schedule_waitpool(self): (x['tuple_size'][0] + x['tuple_size'][2]) * x['tuple_size'][1], reverse=True) - # self._log.debug("schedule waitpool %d", len(tasks)) # cycle through waitpool, and see if we get anything placed now. # self._log.debug('=== before bisec: %d', len(to_test)) scheduled, unscheduled, failed = ru.lazy_bisect(to_test, @@ -734,60 +697,41 @@ def _schedule_waitpool(self): # self._log.debug('=== after bisec: %d : %d : %d', len(scheduled), # len(unscheduled), len(failed)) - # self._log.debug("schedules waitpool %d", len(scheduled)) - # for task in scheduled: - # self._prof.prof('schedule_wait', uid=task['uid']) + for task, error in failed: + task['stderr'] = error + task['control'] = 'tmgr_pending' + task['target_state'] = 'FAILED' + task['$all'] = True - if failed: - for task, error in failed: - task['stderr'] = error - task['control'] = 'tmgr_pending' - task['target_state'] = 'FAILED' - task['$all'] = True - - self._log.error('bisect failed on %s: %s', task['uid'], error) - - self.advance(failed, rps.FAILED, publish=True, push=False) + self._log.error('bisect failed on %s: %s', task['uid'], error) + self.advance(scheduled, rps.FAILED, publish=True, push=False) self._waitpool = {task['uid']: task for task in (unscheduled + to_wait)} - # we only need to re-create the waitpool if any tasks were scheduled - if scheduled: - self._waitpool = {task['uid']:task for task in unscheduled} + # update task resources + for task in scheduled: + td = task['description'] + task['$set'] = ['resources'] + task['resources'] = {'cpu': td['cpu_processes'] * + td.get('cpu_threads', 1), + 'gpu': td['gpu_processes']} + self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, publish=True, + push=True) - # if unscheduled tasks remain in the waitpool, we ran out of resources - if unscheduled and self._waitpool: - if self._waitpool: - resources = False + # method counts as `active` if anything was scheduled + active = bool(scheduled) - if scheduled: - self._log.debug('scheduled from waitpool: %d', len(scheduled)) - - # update task resources - for task in scheduled: - td = task['description'] - task['$set'] = ['resources'] - task['resources'] = {'cpu': td['cpu_processes'] * - td.get('cpu_threads', 1), - 'gpu': td['gpu_processes']} - self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, publish=True, - push=True) + # if we sccheduled some tasks but not all, we ran out of resources + resources = not (bool(unscheduled) and bool(unscheduled)) - # self._log.debug("after schedule waitpool %s: %d waiting", - # resources, len(self._waitpool)) - return resources + # self.slot_status("after schedule waitpool") + return resources, active # -------------------------------------------------------------------------- # def _schedule_incoming(self): -<<<<<<< HEAD - # self._log.debug("before schedule incoming: waiting: %d", - # len(self._waitpool)) - - resources = None # n o change in resource status -======= # fetch all tasks from the queue to_schedule = list() # some tasks get scheduled here to_raptor = dict() # some tasks get forwared to raptor @@ -796,18 +740,10 @@ def _schedule_incoming(self): while not self._proc_term.is_set(): data = self._queue_sched.get(timeout=0.001) ->>>>>>> devel - # we try to schedule incoming tasks as long as they ar incoming. The - # tasks do also get sorted by size (just like we do for the waitpool), - # but that happens only in chunks of `CHUNKSIZE` - while True: + if not isinstance(data, list): + data = [data] -<<<<<<< HEAD - # fetch all tasks from the queue - tasks = list() - try: -======= for task in data: # check if this task is to be scheduled by sub-schedulers # like raptor @@ -822,80 +758,11 @@ def _schedule_incoming(self): # no raptor - schedule it here self._set_tuple_size(task) to_schedule.append(task) ->>>>>>> devel - - while not self._proc_term.is_set(): - data = self._queue_sched.get(timeout=0.001) - -<<<<<<< HEAD - for task in data: - self._set_tuple_size(task) - tasks.append(task) - - if len(tasks) > CHUNKSIZE: - # stop collecting, schedule what we have, only continue - # here when all tasks can be scheduled - # self._log.debug('break for chunk %d', CHUNKSIZE) - break - - except queue.Empty: - # no more unschedule requests - pass - - if not tasks: - # self._log.debug('return for empty queue') - # no resource change, no activity - return None - - self._log.debug("schedule incoming [%d]", len(tasks)) - - - # handle largest tasks first - # FIXME: this needs lazy-bisect - to_schedule = list() - named_envs = dict() - for task in tasks: - - # FIXME: This is a slow and inefficient way to wait for named VEs. - # The semantics should move to the upcoming eligibility - # checker - # FIXME: Note that this code is duplicated in _schedule_waitpool - named_env = task['description'].get('named_env') - if not named_env: - to_schedule.append(task) - - else: - if not named_envs.get(named_env): - # [re]check env: (1) first time check; (2) was not set yet - named_envs[named_env] = os.path.exists('%s.ok' % named_env) - - if named_envs[named_env]: - to_schedule.append(task) - - else: - # put delayed task into the waitpool - self._waitpool[task['uid']] = task - - self._log.debug('delay %s, no env %s', - task['uid'], named_env) - - - to_schedule.sort(key=lambda x: x['tuple_size'][0], reverse=True) - scheduled, unscheduled, failed = ru.lazy_bisect(to_schedule, - check=self._try_allocation, - on_skip=self._prof_sched_skip, - log=self._log) - self._log.debug('unscheduled incoming: %d', len(scheduled)) - self._log.debug('scheduled incoming: %d', len(unscheduled)) - - - if failed: - self.advance(failed, rps.FAILED, publish=True, push=False) + except queue.Empty: + # no more unschedule requests + pass - if scheduled: - for task in scheduled: -======= # forward raptor tasks to their designated raptor if to_raptor: @@ -940,51 +807,57 @@ def _schedule_incoming(self): self._log.debug('delay %s, no env %s', task['uid'], named_env) continue ->>>>>>> devel + # either we can place the task straight away, or we have to + # put it in the wait pool. + try: + if self._try_allocation(task): # task got scheduled - advance state, notify world about the # state change, and push it out toward the next component. - self._prof.prof('schedule_first', uid=task['uid']) td = task['description'] task['$set'] = ['resources'] task['resources'] = {'cpu': td['cpu_processes'] * td.get('cpu_threads', 1), 'gpu': td['gpu_processes']} + self.advance(task, rps.AGENT_EXECUTING_PENDING, + publish=True, push=True) + + else: + to_wait.append(task) - self.advance(scheduled, rps.AGENT_EXECUTING_PENDING, - publish=True, push=True) + except Exception as e: + task['stderr'] = str(e) + task['control'] = 'tmgr_pending' + task['target_state'] = 'FAILED' + task['$all'] = True - # all tasks which could not be scheduled are added to the waitpool - if unscheduled: - self._waitpool.update({task['uid']:task for task in unscheduled}) + self._log.exception('scheduling failed for %s', task['uid']) - # incoming tasks which have to wait are the only reason to - # rebuild the tuple_size map. - self._ts_valid = False + self.advance(task, rps.FAILED, publish=True, push=False) - # if tasks remain waiting, we are out of usable resources - resources = False - # if we could not schedule any task from the last chunk, then we - # should break to allow the unschedule to kick in - # NOTE: new incoming tasks *may* have a chance to get scheduled, - # so this is a lucky(?) guess - break + # all tasks which could not be scheduled are added to the waitpool + self._waitpool.update({task['uid']: task for task in to_wait}) + + # we performed some activity (worked on tasks) + active = True + # if tasks remain waiting, we are out of usable resources + resources = not bool(to_wait) - self._log.debug("after schedule incoming: waiting: %d", - len(self._waitpool)) + # incoming tasks which have to wait are the only reason to rebuild the + # tuple_size map + self._ts_valid = False - return resources + # self.slot_status("after schedule incoming") + return resources, active # -------------------------------------------------------------------------- # def _unschedule_completed(self): - # self._log.debug("unschedule completed") - to_unschedule = list() try: @@ -1001,34 +874,23 @@ def _unschedule_completed(self): # in a max added latency of about 0.1 second, which is one order of # magnitude above our noise level again and thus acceptable (tm). while not self._proc_term.is_set(): -<<<<<<< HEAD - tasks = self._queue_unsched.get(timeout=0.001) - to_unschedule.extend(tasks) - # self._log.debug('unscheduled to batch : %d', len(tasks)) - if len(to_unschedule) > CHUNKSIZE: -======= task = self._queue_unsched.get(timeout=0.01) to_unschedule.append(task) if len(to_unschedule) > 512: ->>>>>>> devel break except queue.Empty: # no more unschedule requests pass - # self._log.debug("unschedule completed %d", len(to_unschedule)) + to_release = list() # slots of unscheduling tasks + placed = list() # uids of waiting tasks replacing unscheduled ones + if to_unschedule: # rebuild the tuple_size binning, maybe - self._log.debug('unscheduled refresh : %d', len(to_unschedule)) self._refresh_ts_map() - self._log.debug('unscheduled refreshed : %d', len(to_unschedule)) - - to_release = list() # unscheduling tasks to release slots from - placed = list() # uids of tasks replacing unscheduled ones - to_advance = list() # tasks to move to the scheduler for task in to_unschedule: # if we find a waiting task with the same tuple size, we don't free @@ -1064,80 +926,28 @@ def _unschedule_completed(self): self._active_cnt -= 1 to_release.append(task) - if not to_release: if not to_unschedule: # no new resources, not been active return False, False else: - - # cycle through the matching ts-candidates. Some - # may be invalid by now, having been scheduled via - # `schedule_waitlist`, but if we find any, break the - # search and swap the slots. - replace = None - while not replace: - - # stop search on emptied candidate list - if not self._ts_map[ts]: - del(self._ts_map[ts]) - break - - candidate = self._ts_map[ts].pop() - replace = self._waitpool.get(candidate) - - if not replace: - - # no replacement task found: free the slots, and try to - # schedule other tasks of other sizes. - to_release.append(task) - - else: - - # found one - swap the slots and push out to executor - replace['slots'] = task['slots'] - placed.append(replace['uid']) - - # unschedule task A and schedule task B have the same - # timestamp - ts = time.time() - self._prof.prof('unschedule_stop', uid=task['uid'], ts=ts) - self._prof.prof('schedule_fast', uid=replace['uid'], ts=ts) - self._prof.prof('schedule_ok', uid=replace['uid'], ts=ts) - to_advance.append(replace) - - if to_advance: - self._log.debug('unscheduled advance : %d', len(to_advance)) - self.advance(to_advance, rps.AGENT_EXECUTING_PENDING, - publish=True, push=True) - + # no new resources, but activity + return False, True # we have tasks to unschedule, which will free some resources. We can # thus try to schedule larger tasks again, and also inform the caller # about resource availability. - # self._log.debug("release completed %d", len(to_release)) - if to_release: - for task in to_release: - self.unschedule_task(task) - self._prof.prof('unschedule_stop', uid=task['uid']) - - self._log.debug('unscheduled release : %d', len(to_release)) + for task in to_release: + self.unschedule_task(task) + self._prof.prof('unschedule_stop', uid=task['uid']) # we placed some previously waiting tasks, and need to remove those from # the waitpool self._waitpool = {task['uid']: task for task in self._waitpool.values() if task['uid'] not in placed} - # if previously waiting tasks were placed, remove them from the waitpool - # self._log.debug("scheduled completed %d", len(placed)) - if placed: - for uid in placed: - del(self._waitpool[uid]) - - # self._log.debug("unscheduled and replaced : %d / %d", len(to_unschedule), len(placed)) - - if to_release: return True # new resources - else : return False + # we have new resources, and were active + return True, True # -------------------------------------------------------------------------- @@ -1181,118 +991,6 @@ def _try_allocation(self, task): # -------------------------------------------------------------------------- # -<<<<<<< HEAD - def _handle_cuda(self, task): - - return - - # Check if task requires GPUs. If so, set CUDA_VISIBLE_DEVICES to the - # list of assigned GPU IDs. We only handle uniform GPU setting for - # now, and will isse a warning on non-uniform ones. - # - # The default setting is `` - # - # FIXME: This code should probably live elsewhere, not in this - # performance critical scheduler base class - # - # FIXME: The specification for `CUDA_VISIBLE_DEVICES` is actually Launch - # Method dependent. Assume the scheduler assigns the second GPU. - # Manually, one would set `CVD=1`. That also holds for launch - # methods like `fork` which leave GPU indexes unaltered. Other - # launch methods like `jsrun` mask the system GPUs and only the - # second GPU is visible to the task. To CUDA the system now - # seems to have only one GPU, and we need set it to `CVD=0`. - # - # In other words, CVD sometimes needs to be set to the physical - # GPU IDs, and at other times to the logical GPU IDs (IDs as - # visible to the task). This also implies that this code should - # actually live within the launch method. On the upside, the - # Launch Method should also be able to handle heterogeneus tasks. - # - # For now, we default the CVD ID mode to `physical`, thus - # assuming that unassigned GPUs are not masked away, as for - # example with `fork` and 'prte'. - - lm_info = self._cfg['rm_info']['lm_info'] - cvd_id_mode = lm_info.get('cvd_id_mode', 'physical') - - gpu_maps = list() - for node in task['slots']['nodes']: - if node['gpu_map'] not in gpu_maps: - gpu_maps.append(node['gpu_map']) - - if not gpu_maps or not gpu_maps[0]: - # no gpu maps, nothing to do - pass - - elif len(gpu_maps) > 1: - # FIXME: this does not actually check for uniformity - self._log.warn('cannot set CUDA_VISIBLE_DEVICES for non-uniform' - 'GPU schedule (%s) - task may fail!' % gpu_maps) - - else: - # uniform, non-zero gpu map - gpu_map = gpu_maps[0] - - if cvd_id_mode == 'physical': - task['description']['environment']['CUDA_VISIBLE_DEVICES'] = \ - ','.join([str(gpu_set[0]) for gpu_set in gpu_map]) - - elif cvd_id_mode == 'logical': - task['description']['environment']['CUDA_VISIBLE_DEVICES'] = \ - ','.join([str(x) for x in range(len(gpu_map))]) - - else: - raise ValueError('invalid CVD mode %s' % cvd_id_mode) - - - # -------------------------------------------------------------------------- - # - def _get_node_maps(self, cores, gpus, threads_per_proc): - ''' - For a given set of cores and gpus, chunk them into sub-sets so that each - sub-set can host one application process and all threads of that - process. Note that we currently consider all GPU applications to be - single-threaded. - - example: - cores : [1, 2, 3, 4, 5, 6, 7, 8] - gpus : [1, 2] - tpp : 4 - result : [[1, 2, 3, 4], [5, 6, 7, 8]], [[1], [2]] - - For more details, see top level comment of `base.py`. - ''' - - core_map = list() - gpu_map = list() - - # make sure the core sets can host the requested number of threads - assert(not len(cores) % threads_per_proc) - n_procs = int(len(cores) / threads_per_proc) - - idx = 0 - for _ in range(n_procs): - p_map = list() - for _ in range(threads_per_proc): - p_map.append(cores[idx]) - idx += 1 - core_map.append(p_map) - - assert(idx == len(cores)), \ - ('%s -- %s -- %s -- %s' % idx, len(cores), cores, n_procs) - - # gpu procs are considered single threaded right now (FIXME) - for g in gpus: - gpu_map.append([g]) - - return core_map, gpu_map - - - # -------------------------------------------------------------------------- - # -======= ->>>>>>> devel def _set_tuple_size(self, task): ''' Scheduling, in very general terms, maps resource request to available diff --git a/src/radical/pilot/agent/scheduler/continuous.py b/src/radical/pilot/agent/scheduler/continuous.py index bcab1eb947..38b54549ac 100644 --- a/src/radical/pilot/agent/scheduler/continuous.py +++ b/src/radical/pilot/agent/scheduler/continuous.py @@ -355,13 +355,8 @@ def schedule_task(self, task): # start the search for node in self._iterate_nodes(): -<<<<<<< HEAD - node_uid = node['uid'] - node_name = node['name'] -======= node_id = node['node_id'] node_name = node['node_name'] ->>>>>>> devel self._log.debug_3('next %s : %s', node_id, node_name) self._log.debug_3('req1: %s = %s + %s', req_slots, rem_slots, diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index 1dc0632e37..361824ffb8 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -28,24 +28,10 @@ "agent_executing_queue" : {"kind": "queue"}, "agent_staging_output_queue" : {"kind": "queue"}, -<<<<<<< HEAD "funcs_req_queue" : {"kind": "queue"}, "funcs_res_queue" : {"kind": "queue"}, -======= - "raptor_scheduling_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 1}, - "funcs_req_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 1}, - "funcs_res_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 0, - "bulk_size" : 1}, ->>>>>>> devel + "raptor_scheduling_queue" : {"kind": "queue"}, "agent_unschedule_pubsub" : {"kind": "pubsub"}, "agent_schedule_pubsub" : {"kind": "pubsub"}, diff --git a/src/radical/pilot/configs/resource_debug.json b/src/radical/pilot/configs/resource_debug.json index 310ea8d81a..738de17ccc 100644 --- a/src/radical/pilot/configs/resource_debug.json +++ b/src/radical/pilot/configs/resource_debug.json @@ -11,24 +11,14 @@ }, "default_queue" : "", "resource_manager" : "FORK", -<<<<<<< HEAD - "lfs_per_node" : "/tmp", - "agent_config" : "debug_sa", - "agent_scheduler" : "CONTINUOUS", - "agent_spawner" : "SLEEP", - "agent_launch_method" : "FORK", - "task_launch_method" : "NOOP", - "mpi_launch_method" : "NOOP", -======= "agent_config" : "default_sa", "agent_scheduler" : "CONTINUOUS", - "agent_spawner" : "POPEN", + "agent_spawner" : "SLEEP", "launch_methods" : { "order" : ["FORK", "MPIRUN"], "FORK" : {}, "MPIRUN": {} }, ->>>>>>> devel "pre_bootstrap_0" : [], "pre_bootstrap_1" : [], "default_remote_workdir" : "$HOME/", @@ -73,12 +63,6 @@ "stage_cacerts" : true, "python_dist" : "default", "virtenv_dist" : "system", -<<<<<<< HEAD - "cores_per_node" : 42, - "gpus_per_node" : 6, - "lfs_per_node" : {"path" : "/tmp", - "size" : 100} -======= "cores_per_node" : 42, "gpus_per_node" : 6, "lfs_path_per_node" : "/tmp", @@ -203,7 +187,6 @@ "cores_per_node" : 8, "gpus_per_node" : 1, "fake_resources" : true ->>>>>>> devel } } diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 8d838dfbc7..a4fb0f281c 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -652,7 +652,6 @@ def stage_in(self, sds): # -------------------------------------------------------------------------- # -<<<<<<< HEAD def rpc(self, rpc, args): ''' Send a pilot command, wait for the response, and return the result. @@ -668,8 +667,6 @@ def rpc(self, rpc, args): # -------------------------------------------------------------------------- # -======= ->>>>>>> devel def stage_out(self, sds=None): ''' Fetch files (default:`staging_output.tgz`) from the pilot sandbox. diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 2e6e82cadd..c91db88b5e 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -402,20 +402,6 @@ def _pilot_send_hb(self, pid=None): # -------------------------------------------------------------------------- # -<<<<<<< HEAD - def _pilot_prepare_env(self, pid, env_spec): - - if not env_spec: - return - - # FIXME: MongoDB - # self._session._dbs.pilot_command('prep_env', env_spec, [pid]) - - - # -------------------------------------------------------------------------- - # -======= ->>>>>>> devel def _pilot_staging_input(self, sds): ''' Run some staging directives for a pilot. @@ -618,18 +604,8 @@ def submit_pilots(self, descriptions): # only trigger the profile entry for NEW. self.advance(pilot_docs, state=rps.NEW, publish=False, push=False) -<<<<<<< HEAD # immediately send first heartbeat and any other commands which are # included in the pilot description -======= - if self._session._rec: - self._rec_id += 1 - - # insert pilots into the database, as a bulk. - self._session._dbs.insert_pilots(pilot_docs) - - # immediately send first heartbeat ->>>>>>> devel for pilot_doc in pilot_docs: pid = pilot_doc['uid'] self._pilot_send_hb(pid) diff --git a/src/radical/pilot/pmgr/launching/default.py b/src/radical/pilot/pmgr/launching/default.py index 0a97de4d83..54e5b07801 100644 --- a/src/radical/pilot/pmgr/launching/default.py +++ b/src/radical/pilot/pmgr/launching/default.py @@ -532,10 +532,6 @@ def _start_pilot_bulk(self, resource, schema, pilots): # direct staging, use first pilot for staging context # NOTE: this implies that the SDS can only refer to session # sandboxes, not to pilot sandboxes! -<<<<<<< HEAD - self._log.debug(info['sds']) -======= ->>>>>>> devel self._stage_in(pilots[0], info['sds']) for ft in ft_list: @@ -686,16 +682,9 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # ---------------------------------------------------------------------- # get parameters from resource cfg, set defaults where needed -<<<<<<< HEAD - agent_launch_method = rcfg.get('agent_launch_method') - agent_service_url = rcfg.get('agent_service_url', service_url) - agent_spawner = rcfg.get('agent_spawner', DEFAULT_AGENT_SPAWNER) - agent_config = rcfg.get('agent_config', DEFAULT_AGENT_CONFIG) -======= - agent_dburl = rcfg.get('agent_mongodb_endpoint', database_url) + agent_service_url = rcfg.get('agent_service_url', service_url) agent_spawner = rcfg.get('agent_spawner', DEFAULT_AGENT_SPAWNER) agent_config = rcfg.get('agent_config', DEFAULT_AGENT_CONFIG) ->>>>>>> devel agent_scheduler = rcfg.get('agent_scheduler') tunnel_bind_device = rcfg.get('tunnel_bind_device') default_queue = rcfg.get('default_queue') @@ -873,22 +862,12 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # ---------------------------------------------------------------------- # sanity checks -<<<<<<< HEAD RE = RuntimeError if not python_dist : raise RE("missing python distribution") if not virtenv_dist : raise RE("missing virtualenv distribution") if not agent_spawner : raise RE("missing agent spawner") if not agent_scheduler : raise RE("missing agent scheduler") if not resource_manager : raise RE("missing resource manager") - if not agent_launch_method: raise RE("missing agentlaunch method") - if not task_launch_method : raise RE("missing task launch method") -======= - if not python_dist : raise RuntimeError("missing python distribution") - if not virtenv_dist : raise RuntimeError("missing virtualenv distribution") - if not agent_spawner : raise RuntimeError("missing agent spawner") - if not agent_scheduler : raise RuntimeError("missing agent scheduler") - if not resource_manager: raise RuntimeError("missing resource manager") ->>>>>>> devel # massage some values if not queue: From 06c70733be22e656998792fd3254329bab23ca8a Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 8 Dec 2021 09:57:33 +0100 Subject: [PATCH 019/171] progress snapshot --- bin/radical-pilot-proxy-server | 3 + examples/00_getting_started.py | 6 +- setup.py | 2 +- src/radical/pilot/agent/agent_0.py | 19 ++--- src/radical/pilot/agent/scheduler/base.py | 4 +- .../pilot/agent/scheduler/continuous.py | 8 ++- src/radical/pilot/configs/agent_default.json | 26 +++---- .../pilot/configs/agent_default_sa.json | 2 +- src/radical/pilot/configs/agent_scale.json | 2 +- src/radical/pilot/pilot.py | 12 +++- src/radical/pilot/pilot_manager.py | 11 +++ src/radical/pilot/session.py | 72 ++++++++++++------- src/radical/pilot/task_manager.py | 19 +++++ src/radical/pilot/utils/component.py | 20 +++--- 14 files changed, 138 insertions(+), 68 deletions(-) diff --git a/bin/radical-pilot-proxy-server b/bin/radical-pilot-proxy-server index 017e352f89..1e46ede4ed 100755 --- a/bin/radical-pilot-proxy-server +++ b/bin/radical-pilot-proxy-server @@ -239,16 +239,19 @@ class ZMQBridge(ru.zmq.Server): proxy_cp = ru.zmq.PubSub(cfg={'channel': 'proxy_control_pubsub', 'uid' : 'proxy_control_pubsub', 'type' : 'pubsub', + 'log_lvl': 'debug', 'path' : sid}) proxy_sp = ru.zmq.PubSub(cfg={'channel': 'proxy_state_pubsub', 'uid' : 'proxy_state_pubsub', 'type' : 'pubsub', + 'log_lvl': 'debug', 'path' : sid}) proxy_aq = ru.zmq.Queue (cfg={'channel': 'proxy_task_queue', 'uid' : 'proxy_task_queue', 'type' : 'queue', + 'log_lvl': 'debug', 'path' : sid}) proxy_cp.start() diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 5eb12190f7..d95cb99669 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -4,6 +4,7 @@ __license__ = 'MIT' import os +from re import A import sys import random @@ -58,16 +59,15 @@ 'exit_on_error' : True, 'project' : config.get('project', None), 'queue' : config.get('queue', None), - 'access_schema' : config.get('schema', None), 'cores' : config.get('cores', None), + 'cores' : 1024, 'gpus' : config.get('gpus', 0), } pdesc = rp.PilotDescription(pd_init) - # Launch the pilot. pilot = pmgr.submit_pilots(pdesc) - n = 1024 * 1024 # number of tasks to run + n = 1 # 024 * 1024 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/setup.py b/setup.py index 3b16756936..5e6ea34cdd 100755 --- a/setup.py +++ b/setup.py @@ -220,7 +220,7 @@ def run(self): 'package_dir' : {'': 'src'}, 'scripts' : [ 'bin/radical-pilot-agent', - 'bin/radical-pilot-agent-bridge', + # 'bin/radical-pilot-agent-bridge', 'bin/radical-pilot-agent-funcs', 'bin/radical-pilot-agent-statepush', 'bin/radical-pilot-bridge', diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index b40ac56a8d..531bdc2cc8 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -170,7 +170,8 @@ def _connect_proxy(self): # listen for completed tasks to foward to client self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.AGENT_COLLECTING_QUEUE, - self._proxy_output_cb) + qname='default', + cb=self._proxy_output_cb) # and register output self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, @@ -183,7 +184,7 @@ def _connect_proxy(self): # def _proxy_input_cb(self, msg): - self._log.debug('=== proxy input cb: %s', len(msg)) + self._log.debug('proxy input cb: %s', len(msg)) to_advance = list() @@ -227,7 +228,7 @@ def _proxy_output_cb(self, msg): # def _client_ctrl_cb(self, topic, msg): - self._log.debug('=== ctl sub cb: %s %s', topic, msg) + self._log.debug('ctl sub cb: %s %s', topic, msg) # -------------------------------------------------------------------------- @@ -284,7 +285,7 @@ def initialize(self): # sub-agents are started, components are started, bridges are up: we are # ready to roll! Send state update - rm_info = self._rm.rm_info + rm_info = self._rm.info n_nodes = len(rm_info['node_list']) pilot = {'type' : 'pilot', @@ -367,7 +368,7 @@ def finalize(self): 'logfile': log, 'state' : state} - self._log.debug('=== push final state update') + self._log.debug('push final state update') self._log.debug('update state: %s: %s', state, self._final_cause) self.publish(rpc.PROXY_STATE_PUBSUB, topic=rpc.STATE_PUBSUB, msg=[pilot]) @@ -628,7 +629,7 @@ def _proxy_state_cb(self, topic, msg): # def _proxy_control_cb(self, topic, msg): - self._log.debug('=== proxy control: %s', msg) + self._log.debug('proxy control: %s', msg) cmd = msg['cmd'] arg = msg['arg'] @@ -656,10 +657,10 @@ def _proxy_control_cb(self, topic, msg): if cmd == 'cancel_pilots': if self._pid not in arg.get('uids'): - self._log.debug('=== ignore cancel %s', msg) + self._log.debug('ignore cancel %s', msg) return True - self._log.info('=== cancel pilot cmd') + self._log.info('cancel pilot cmd') self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'terminate', 'arg' : None}) self._final_cause = 'cancel' @@ -670,7 +671,7 @@ def _proxy_control_cb(self, topic, msg): # all other messages (such as cancel_tasks) are forwarded to the agent # control pubsub, to be picked up by the respective target components - self._log.debug('=== fwd control msg %s', msg) + self._log.debug('fwd control msg %s', msg) self.publish(rpc.CONTROL_PUBSUB, msg) return True diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 45604bcb57..36d4c0bcb0 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -874,8 +874,8 @@ def _unschedule_completed(self): # in a max added latency of about 0.1 second, which is one order of # magnitude above our noise level again and thus acceptable (tm). while not self._proc_term.is_set(): - task = self._queue_unsched.get(timeout=0.01) - to_unschedule.append(task) + tasks = self._queue_unsched.get(timeout=0.01) + to_unschedule += ru.as_list(tasks) if len(to_unschedule) > 512: break diff --git a/src/radical/pilot/agent/scheduler/continuous.py b/src/radical/pilot/agent/scheduler/continuous.py index 38b54549ac..5be4954180 100644 --- a/src/radical/pilot/agent/scheduler/continuous.py +++ b/src/radical/pilot/agent/scheduler/continuous.py @@ -5,7 +5,8 @@ import pprint import math as m -import pprint + +import radical.utils as ru from ... import constants as rpc from .base import AgentSchedulingComponent @@ -115,7 +116,7 @@ def _iterate_nodes(self): # -------------------------------------------------------------------------- # - def unschedule_task(self, task): + def unschedule_task(self, tasks): ''' This method is called when previously aquired resources are not needed anymore. `slots` are the resource slots as previously returned by @@ -123,7 +124,8 @@ def unschedule_task(self, task): ''' # reflect the request in the nodelist state (set to `FREE`) - self._change_slot_states(task['slots'], rpc.FREE) + for task in ru.as_list(tasks): + self._change_slot_states(task['slots'], rpc.FREE) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index 361824ffb8..9a785420b3 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -23,22 +23,24 @@ # stall_hwm and batch_size is 1 (no stalling, no bulking). # "bridges" : { - "agent_staging_input_queue" : {"kind": "queue"}, - "agent_scheduling_queue" : {"kind": "queue"}, - "agent_executing_queue" : {"kind": "queue"}, - "agent_staging_output_queue" : {"kind": "queue"}, + "agent_staging_input_queue" : {"kind": "queue", "log_lvl":"debug"}, + "agent_scheduling_queue" : {"kind": "queue", "log_lvl":"debug"}, + "agent_executing_queue" : {"kind": "queue", "log_lvl":"debug"}, + "agent_staging_output_queue" : {"kind": "queue", "log_lvl":"debug"}, + "agent_collecting_queue" : {"kind": "queue", "log_lvl":"debug"}, - "funcs_req_queue" : {"kind": "queue"}, - "funcs_res_queue" : {"kind": "queue"}, + "funcs_req_queue" : {"kind": "queue", "log_lvl":"debug"}, + "funcs_res_queue" : {"kind": "queue", "log_lvl":"debug"}, - "raptor_scheduling_queue" : {"kind": "queue"}, + "raptor_scheduling_queue" : {"kind": "queue", "log_lvl":"debug"}, - "agent_unschedule_pubsub" : {"kind": "pubsub"}, - "agent_schedule_pubsub" : {"kind": "pubsub"}, + "agent_unschedule_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}, + "agent_schedule_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}, - "control_pubsub" : {"kind": "pubsub"}, - "state_pubsub" : {"kind": "pubsub"} - # "log_pubsub" : {"kind": "pubsub"} + "control_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}, + "state_pubsub" : {"kind": "pubsub", "log_lvl":"debug"} + + # "log_pubsub" : {"kind": "pubsub", "log_lvl":"debug"} }, "components" : { diff --git a/src/radical/pilot/configs/agent_default_sa.json b/src/radical/pilot/configs/agent_default_sa.json index d44bc7d607..b5229075a9 100644 --- a/src/radical/pilot/configs/agent_default_sa.json +++ b/src/radical/pilot/configs/agent_default_sa.json @@ -65,7 +65,7 @@ "components" : { # the update worker must live in agent.0, since only that agent is # sure to have connectivity toward the DB. - "update" : {"count" : 1}, + # "update" : {"count" : 1}, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, # "agent_executing" : {"count" : 1}, diff --git a/src/radical/pilot/configs/agent_scale.json b/src/radical/pilot/configs/agent_scale.json index b978a2cda7..423df85daa 100644 --- a/src/radical/pilot/configs/agent_scale.json +++ b/src/radical/pilot/configs/agent_scale.json @@ -65,7 +65,7 @@ "components" : { # the update worker must live in agent.0, since only that agent is # sure to have connectivity toward the DB. - "update" : {"count" : 1}, + # "update" : {"count" : 1}, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, "agent_staging_output" : {"count" : 1} diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index a4fb0f281c..143603755f 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -160,12 +160,22 @@ def __str__(self): # def _default_state_cb(self, pilot, state=None): - uid = self.uid state = self.state self._log.info("[Callback]: pilot %s state: %s.", uid, state) + if state in rps.FINAL: + # dump json + json = self.as_dict() + # json['_id'] = self.uid + json['type'] = 'pilot' + json['uid'] = self.uid + + tgt = '%s/%s.json' % (self._session.path, self.uid) + ru.write_json(json, tgt) + + if state == rps.FAILED and self._exit_on_error: self._log.error("[Callback]: pilot '%s' failed (exit)", uid) diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index c91db88b5e..1b324c3d01 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -228,6 +228,17 @@ def close(self, terminate=True): self._rep.ok('>>ok\n') + # dump json + json = self.as_dict() + # json['_id'] = self.uid + json['type'] = 'pmgr' + json['uid'] = self.uid + + tgt = '%s/%s.json' % (self._session.path, self.uid) + ru.write_json(json, tgt) + + + # -------------------------------------------------------------------------- # def as_dict(self): diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 8507a7e5a1..89ff583a9a 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -5,6 +5,7 @@ import os import copy +import glob import time import threading as mt @@ -129,11 +130,6 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True, self._log = self._get_logger (name=self._uid, level=self._cfg.get('debug')) - from . import version_detail as rp_version_detail - self._log.info('radical.pilot version: %s' % rp_version_detail) - self._log.info('radical.saga version: %s' % rs.version_detail) - self._log.info('radical.utils version: %s' % ru.version_detail) - self._prof.prof('session_start', uid=self._uid) # now we have config and uid - initialize base class (saga session) @@ -177,17 +173,12 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True, self._service = ru.zmq.Client(url=self._cfg.service_url) response = self._service.request('client_lookup', {'sid': self._uid}) - if response.err: - for line in response.exc: - self._log.error(line) - raise RuntimeError('request failed: %s' % response.err) - - self._cfg.proxy = response.res - + self._cfg.proxy = response self._log.debug('=== %s: %s', self._primary, self._cfg.proxy) # for mostly debug purposes, dump the used session config + ru.write_json(self._cfg, '%s/%s.cfg' % (self._cfg.path, self._uid)) # at this point we have a bridge connection, logger, etc, and are done @@ -268,11 +259,10 @@ def close(self, **kwargs): self._closed = True - # after all is said and done, we attempt to download the pilot log- and # profiles, if so wanted if options.download: -) + self._prof.prof("session_fetch_start", uid=self._uid) self._log.debug('start download') tgt = os.getcwd() @@ -289,6 +279,33 @@ def close(self, **kwargs): % (self._t_stop - self._t_start)) self._rep.ok('>>ok\n') + # dump json + json = {'session' : self.as_dict(), + 'pmgr' : list(), + 'pilot' : list(), + 'tmgr' : list(), + 'task' : list()} + + # json['session']['_id'] = self.uid + json['session']['type'] = 'session' + json['session']['uid'] = self.uid + json['session']['metadata'] = self._metadata + + for fname in glob.glob('%s/pmgr.*.json' % self.path): + json['pmgr'].append(ru.read_json(fname)) + + for fname in glob.glob('%s/pilot.*.json' % self.path): + json['pilot'].append(ru.read_json(fname)) + + for fname in glob.glob('%s/tmgr.*.json' % self.path): + json['tmgr'].append(ru.read_json(fname)) + + for fname in glob.glob('%s/tasks.*.json' % self.path): + json['task'] += ru.read_json(fname) + + tgt = '%s/%s.json' % (self.path, self.uid) + ru.write_json(json, tgt) + # -------------------------------------------------------------------------- # @@ -304,12 +321,8 @@ def _connect_proxy(self): self._service = ru.zmq.Client(url=self._cfg.service_url) response = self._service.request('client_register', {'sid': self._uid}) - if response.err: - for line in response.exc: - self._log.error(line) - raise RuntimeError('request failed: %s' % response.err) - self._cfg.proxy = response.res + self._cfg.proxy = response self._log.debug('=== %s: %s', self._primary, self._cfg.proxy) # now that the proxy bridges have been created on the service host, @@ -330,12 +343,17 @@ def _connect_proxy(self): # make sure we send heartbeats to the proxy self._run_proxy_hb() + from . import version_detail as rp_version_detail + self._log.info('radical.pilot version: %s' % rp_version_detail) + self._log.info('radical.saga version: %s' % rs.version_detail) + self._log.info('radical.utils version: %s' % ru.version_detail) + # FIXME MONGODB: to json - # self.inject_metadata({'radical_stack': - # {'rp': rp_version_detail, - # 'rs': rs.version_detail, - # 'ru': ru.version_detail, - # 'py': py_version_detail}}) + self._metadata = {'radical_stack': + {'rp': rp_version_detail, + 'rs': rs.version_detail, + 'ru': ru.version_detail}} + # 'py': py_version_detail}} pwd = self._cfg.path @@ -387,9 +405,9 @@ def as_dict(self): object_dict = { "uid" : self._uid, - "created" : self.created, - "connected" : self.connected, - "closed" : self.closed, + # "created" : self.created, + # "connected" : self.connected, + # "closed" : self.closed, "service_url": str(self.service_url), "cfg" : copy.deepcopy(self._cfg) } diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index 773760eb1e..33f0310a06 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -111,6 +111,7 @@ def __init__(self, session, cfg='default', scheduler=None): self._tcb_lock = mt.RLock() self._terminate = mt.Event() self._closed = False + self._task_info = list() for m in rpc.TMGR_METRICS: self._callbacks[m] = dict() @@ -228,6 +229,21 @@ def close(self): self._closed = True self._rep.ok('>>ok\n') + # dump json + json = self.as_dict() + # json['_id'] = self.uid + json['type'] = 'tmgr' + json['uid'] = self.uid + + tgt = '%s/%s.json' % (self._session.path, self.uid) + ru.write_json(json, tgt) + + # dump task json + json = self._task_info + + tgt = '%s/tasks.%s.json' % (self._session.path, self.uid) + ru.write_json(json, tgt) + # -------------------------------------------------------------------------- # @@ -417,6 +433,9 @@ def _update_tasks(self, task_dicts): self._tasks[uid]._update(task_dict) to_notify.append([task, s]) + if task_dict['state'] in rps.FINAL: + self._task_info.append(task_dict) + if to_notify: if _USE_BULK_CB: self._bulk_cbs(set([task for task,_ in to_notify])) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 4418f71437..33ea3b3f9f 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -577,7 +577,7 @@ def _cancel_monitor_cb(self, topic, msg): # currently have no abstract 'cancel' command, but instead use # 'cancel_tasks'. - self._log.debug('command incoming: %s', msg) + # self._log.debug('command incoming: %s', msg) cmd = msg['cmd'] arg = msg['arg'] @@ -598,8 +598,8 @@ def _cancel_monitor_cb(self, topic, msg): self._log.info('got termination command') self.stop() - else: - self._log.debug('command ignored: %s', cmd) + # else: + # self._log.debug('command ignored: %s', cmd) return True @@ -731,7 +731,7 @@ def register_input(self, states, queue, cb=None, qname=None): 'qname' : qname, 'states' : states} - self._log.debug('registered input %s', name) + self._log.debug('registered input %s [%s] [%s]', name, queue, qname) # we want exactly one worker associated with a state -- but a worker # can be responsible for multiple states @@ -1072,6 +1072,9 @@ def work_cb(self): time.sleep(0.1) return True + # TODO: should a poller over all inputs, or better yet register + # a callback + for name in self._inputs: queue = self._inputs[name]['queue'] @@ -1086,9 +1089,10 @@ def work_cb(self): # qname, len(things)) if not things: + # next input + continue - # return to have a chance to catch term signals - return True + # self._log.debug('work_cb ===== : %d', len(things)) # the worker target depends on the state of things, so we # need to sort the things into buckets by state before @@ -1279,8 +1283,8 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, # empty output -- drop thing # for thing in _things: # self._log.debug('=== drop %s [%s]', thing['uid'], _state) - # # self._prof.prof('drop', uid=thing['uid'], state=_state, - # # ts=ts) + # self._prof.prof('drop', uid=thing['uid'], state=_state, + # ts=ts) continue output = self._outputs[_state] From f63d423935ad207100520b7c10fa101c928673b9 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 8 Mar 2022 17:52:12 +0100 Subject: [PATCH 020/171] proxy on demand --- bin/radical-pilot-create-static-ve | 2 +- bin/radical-pilot-proxy-server | 336 +----------------- examples/00_getting_started.py | 2 +- setup.py | 2 +- src/radical/pilot/__init__.py | 1 + src/radical/pilot/agent/bootstrap_0.sh | 4 +- .../pilot/configs/session_default.json | 1 - src/radical/pilot/session.py | 51 ++- src/radical/pilot/task_manager.py | 4 +- 9 files changed, 60 insertions(+), 343 deletions(-) diff --git a/bin/radical-pilot-create-static-ve b/bin/radical-pilot-create-static-ve index 14b601384b..4a7bee2f2a 100755 --- a/bin/radical-pilot-create-static-ve +++ b/bin/radical-pilot-create-static-ve @@ -51,7 +51,7 @@ if ! test -z "$DEFAULTS" then # by default, install all RCT dependencies MODULES="$MODULES apache-libcloud chardet colorama idna msgpack" - MODULES="$MODULES msgpack-python netifaces ntplib parse pymongo<4" + MODULES="$MODULES msgpack-python netifaces ntplib parse dill" MODULES="$MODULES pyzmq regex requests setproctitle urllib3" fi diff --git a/bin/radical-pilot-proxy-server b/bin/radical-pilot-proxy-server index 1e46ede4ed..3f3a7a949a 100755 --- a/bin/radical-pilot-proxy-server +++ b/bin/radical-pilot-proxy-server @@ -1,340 +1,21 @@ #!/usr/bin/env python3 -import sys -import time -import queue - -import threading as mt -import multiprocessing as mp -import radical.utils as ru - - -_TIMEOUT = 300 # time to keep the bridge alive -_LINGER_TIMEOUT = 250 # ms to linger after close -_HIGH_WATER_MARK = 0 # number of messages to buffer before dropping - # 0: infinite - - -# ------------------------------------------------------------------------------ -# This ZMQ bridge links clients and agents, and bridges network gaps. As such -# it needs to run on a resource which has a public IP address that can be -# reached from both the client and the server machine. -# -# The bridge listens on a `REP` socket (`bridge_request`) for incoming client or -# agent connections, identified by a common session ID. A client connection -# will trigger the creation of the following communication channels: -# -# - proxy_control_pubsub_bridge -# links client and agent control pubsubs (includes heartbeat) -# - proxy_state_pubsub_bridge -# forwards task state updates from agents to client -# - proxy_task_queue -# forwards tasks from the client to the agents and vice versa -# -# -# The protocol on the `bridge_request` channel is as follows: -# -# client_register -# --------------- -# -# request: -# 'cmd': 'client_register' -# 'arg': 'sid': -# -# reply: -# 'res': {'proxy_control_pubsub': {'sub': , 'pub': }, -# 'proxy_state_pubsub' : {'sub': , 'pub': }, -# 'proxy_task_queue' : {'put': , 'get': }} -# -# notes: -# - the request will fail if the session ID is known from another -# `client_register` call -# 'err': 'sid already connected' -# - this request should otherwise always succeed -# - the created pubsub channels will be terminated if the control channel -# has not seen a client heartbeat for <10 * heartbeat_interval> seconds -# - see semantics of the 'client_unregister' request for details. -# - the same termination semantics holds for the 'client_unregister' -# request. -# - any task queues which exist for that session at the time of -# termination will also be closed, disregarding any data held in those -# queues. -# -# -# client_lookup -# --------------- -# -# request: -# 'cmd': 'client_lookup' -# 'arg': 'sid': -# -# reply: -# 'res': {'proxy_control_pubsub': {'sub': , 'pub': }, -# 'proxy_state_pubsub' : {'sub': , 'pub': }, -# 'proxy_task_queue' : {'put': , 'get': }} -# -# notes: -# - the request will fail if the session ID is not registered (anymore) -# - this request should otherwise always succeed -# - the call returns the same information as `client_register`, but does -# not alter the state of the client's bridge in any other way. -# - the request does not count as a heartbeat -# -# -# client_unregister -# ----------------- -# -# request: -# 'cmd': 'client_unregister' -# 'arg': 'sid': -# -# reply: -# 'res': 'ok' -# -# - this method only fails when the session is not connected, with -# 'err': 'session not connected' -# - in all other cases, the request will cause the immediate termination of -# all ZMQ bridges (pubsubs and queues) previously created for that -# session, disregarding of their state, and disposing all undelivered -# messages still held in the bridges. -# -# -# client_heartbeat -# ---------------- -# -# request: -# 'cmd': 'client_heartbeat' -# 'arg': 'sid': -# -# reply: -# 'res': {'time': } -# -# notes: -# - this request will fail if the session is either not connected or timed -# because of an earlier heartbeat failure: -# 'err': 'session not connected' -# - it will otherwise ensure the server that the client is still alive and -# requires the bridge to be up. If the server does not receive a heartbeat -# for longer than TIMEOUT seconds, the bridge will be terminated. -# -# -# default error mode -# ------------------ -# -# To any request other than the above, the ZMQ bridge will respond: -# 'err': 'invalid request' -# -# ------------------------------------------------------------------------------ - -# ------------------------------------------------------------------------------ -# -class ZMQBridge(ru.zmq.Server): - - def __init__(self): - - self._lock = mt.Lock() - self._clients = dict() - - ru.zmq.Server.__init__(self, url='tcp://*:10000+') - - self._monitor = mt.Thread(target=self._monitor) - self._monitor.daemon = True - self._monitor.start() - - self.register_request('client_register', self._client_register) - self.register_request('client_lookup', self._client_lookup) - self.register_request('client_unregister', self._client_unregister) - self.register_request('client_heartbeat', self._client_heartbeat) - - - # -------------------------------------------------------------------------- - # - def _monitor(self): - - # this is a daemon thread - it never exits until process termination - while True: - - time.sleep(10) - now = time.time() - - # iterate w/o lock, and thus get a snapshot of the known sids - sids = list(self._clients.keys()) - - to_terminate = list() - for sid in sids: - - client = self._clients.get(sid) - if not client: - continue +__copyright__ = "Copyright 2013-2022, http://radical.rutgers.edu" +__license__ = "MIT" - if now > (client['hb'] + _TIMEOUT): - self._log.warn('client %s timed out' % sid) - to_terminate.append(sid) - if not to_terminate: - continue - - with self._lock: - - for sid in to_terminate: - - client = self._clients.get(sid) - if not client: - continue - - client['term'].set() - client['proc'].join() - del(self._clients[sid]) - - - # -------------------------------------------------------------------------- - # - def stop(self): - - for sid in self._clients: - self._clients[sid]['term'].set() - - ru.zmq.Server.stop(self) - - - # -------------------------------------------------------------------------- - # - def _client_register(self, arg): - - sid = arg['sid'] - - if sid in self._clients: - raise RuntimeError('client already registered') - - q = mp.Queue() - term = mp.Event() - proc = mp.Process(target=self._worker, args=[sid, q, term]) - proc.start() - - try: - data = q.get(timeout=10) - except queue.Empty: - proc.terminate() - raise RuntimeError('worker startup failed') - - self._clients[sid] = {'proc': proc, - 'term': term, - 'data': data, - 'hb' : time.time()} - - return self._clients[sid]['data'] - - - # -------------------------------------------------------------------------- - # - def _worker(self, sid, q, term): - - log = ru.Logger('radical.pilot.bridge', level='debug', path=sid) - - proxy_cp = None - proxy_sp = None - proxy_aq = None - - try: - proxy_cp = ru.zmq.PubSub(cfg={'channel': 'proxy_control_pubsub', - 'uid' : 'proxy_control_pubsub', - 'type' : 'pubsub', - 'log_lvl': 'debug', - 'path' : sid}) - - proxy_sp = ru.zmq.PubSub(cfg={'channel': 'proxy_state_pubsub', - 'uid' : 'proxy_state_pubsub', - 'type' : 'pubsub', - 'log_lvl': 'debug', - 'path' : sid}) - - proxy_aq = ru.zmq.Queue (cfg={'channel': 'proxy_task_queue', - 'uid' : 'proxy_task_queue', - 'type' : 'queue', - 'log_lvl': 'debug', - 'path' : sid}) - - proxy_cp.start() - proxy_sp.start() - proxy_aq.start() - - data = {'proxy_control_pubsub': {'pub': str(proxy_cp.addr_pub), - 'sub': str(proxy_cp.addr_sub)}, - 'proxy_state_pubsub' : {'pub': str(proxy_sp.addr_pub), - 'sub': str(proxy_sp.addr_sub)}, - 'proxy_task_queue' : {'put': str(proxy_aq.addr_put), - 'get': str(proxy_aq.addr_get)}} - - # inform service about endpoint details - q.put(data) - - # we run forever until we receive a termination command - log.info('work') - term.wait() - - - except: - log.exception('worker failed') - - finally: - - if proxy_cp: proxy_cp.stop() - if proxy_sp: proxy_sp.stop() - if proxy_aq: proxy_aq.stop() - - log.info('terminated') - - - # -------------------------------------------------------------------------- - # - def _client_lookup(self, arg): - - sid = arg['sid'] - - with self._lock: - if sid not in self._clients: - raise RuntimeError('client %s not registered' % sid) - - return self._clients[sid]['data'] - - - # -------------------------------------------------------------------------- - # - def _client_unregister(self, arg): - - sid = arg['sid'] - - with self._lock: - - if sid not in self._clients: - raise RuntimeError('client %s not registered' % sid) - - self._clients[sid]['term'].set() - self._clients[sid]['proc'].join() - - del(self._clients[sid]) - - - # -------------------------------------------------------------------------- - # - def _client_heartbeat(self, arg): - - sid = arg['sid'] - now = time.time() - - with self._lock: - - if sid not in self._clients: - raise RuntimeError('client %s not registered' % sid) +import sys +import time - self._clients[sid]['hb'] = now +import radical.utils as ru +import radical.pilot as rp # ------------------------------------------------------------------------------ # if __name__ == '__main__': - bridge = ZMQBridge() + bridge = rp.Proxy() try: sys.stdout.write('uid : %s\n' % bridge.uid) @@ -350,11 +31,8 @@ if __name__ == '__main__': time.sleep(1) finally: - print('final') bridge.stop() - print('stopped') bridge.wait() - print('waited') # ------------------------------------------------------------------------------ diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index d95cb99669..726b845852 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -67,7 +67,7 @@ pilot = pmgr.submit_pilots(pdesc) - n = 1 # 024 * 1024 # number of tasks to run + n = 1024 * 1024 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/setup.py b/setup.py index a113a9a32b..e0ce605123 100755 --- a/setup.py +++ b/setup.py @@ -250,7 +250,7 @@ def run(self): # 'setup_requires' : ['pytest-runner'], 'install_requires' : ['radical.utils>=1.12', 'radical.saga>=1.12', - 'pymongo<4', + 'dill', 'setproctitle' ], 'extras_require' : {'autopilot' : ['github3.py']}, diff --git a/src/radical/pilot/__init__.py b/src/radical/pilot/__init__.py index e022972951..77be24429e 100644 --- a/src/radical/pilot/__init__.py +++ b/src/radical/pilot/__init__.py @@ -18,6 +18,7 @@ # import API from .session import Session from .context import Context +from .proxy import Proxy from .task_manager import TaskManager from .task import Task diff --git a/src/radical/pilot/agent/bootstrap_0.sh b/src/radical/pilot/agent/bootstrap_0.sh index c552043712..de62e43c08 100755 --- a/src/radical/pilot/agent/bootstrap_0.sh +++ b/src/radical/pilot/agent/bootstrap_0.sh @@ -128,10 +128,10 @@ VIRTENV_TGZ="$VIRTENV_VER.tar.gz" VIRTENV_TGZ_URL="https://files.pythonhosted.org/packages/66/f0/6867af06d2e2f511e4e1d7094ff663acdebc4f15d4a0cb0fed1007395124/$VIRTENV_TGZ" VIRTENV_IS_ACTIVATED=FALSE -VIRTENV_RADICAL_DEPS="pymongo<4 colorama ntplib "\ +VIRTENV_RADICAL_DEPS="dill colorama ntplib "\ "pyzmq netifaces setproctitle msgpack regex" -VIRTENV_RADICAL_MODS="pymongo colorama ntplib "\ +VIRTENV_RADICAL_MODS="dill colorama ntplib "\ "zmq netifaces setproctitle msgpack regex" if ! test -z "$RADICAL_DEBUG" diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index ab60efdafa..c1fff96794 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -5,7 +5,6 @@ { "service_url" : "${RADICAL_PILOT_SERVICE_URL}", "session_base" : "${RADICAL_PILOT_SESSION_BASE:$PWD}", - "record" : "${RADICAL_PILOT_SESSION_RECORD}", "heartbeat" : { "interval" : 1.0, diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 963bcc14b1..66bdca5529 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -15,8 +15,9 @@ import radical.saga.filesystem as rsfs import radical.saga.utils.pty_shell as rsup -from . import constants as rpc -from . import utils as rpu +from . import constants as rpc +from . import utils as rpu +from .proxy import Proxy # ------------------------------------------------------------------------------ @@ -55,8 +56,9 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True, **Arguments:** * **service_url** (`string`): The Bridge Service URL. If none is given, RP uses the environment variable - RADICAL_PILOT_SERVICE_URL. If that is not set, an error will be - raised. + RADICAL_PILOT_SERVICE_URL. If that is not set, a temporary + service will be started on localhost for the time of the + application's execution. * **cfg** (`str` or `dict`): a named or instantiated configuration to be used for the session. @@ -156,9 +158,22 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True, service_url = os.environ.get('RADICAL_PILOT_SERVICE_URL') if not service_url: - # FIXME MongoDB: in this case, start an embedded service - raise RuntimeError("no service url (set RADICAL_PILOT_SERVICE_URL)") + if not _primary: + raise RuntimeError('no proxy service URL?') + else: + # start a temporary embedded service + self._proxy_addr = None + self._proxy_event = mt.Event() + + self._proxy_thread = mt.Thread(target=self._proxy) + self._proxy_thread.daemon = True + self._proxy_thread.start() + + self._proxy_event.wait() + assert(self._proxy_addr) + service_url = self._proxy_addr + os.environ['RADICAL_PILOT_SERVICE_URL'] = service_url self._cfg.service_url = service_url @@ -307,6 +322,30 @@ def close(self, **kwargs): ru.write_json(json, tgt) + # -------------------------------------------------------------------------- + # + def _proxy(self): + + bridge = Proxy() + + try: + bridge.start() + + self._proxy_addr = bridge.addr + self._proxy_event.set() + + # run forever until process is interrupted or killed + while True: + time.sleep(1) + + finally: + bridge.stop() + bridge.wait() + + +# ------------------------------------------------------------------------------ + + # -------------------------------------------------------------------------- # def _connect_proxy(self): diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index 64b5d752a1..d180c7563a 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -753,8 +753,6 @@ def submit_tasks(self, descriptions): ret += tasks tasks = list() - self._rep.progress_done() - # submit remaining bulk (if any) if tasks: task_docs = [u.as_dict() for u in tasks] @@ -762,6 +760,8 @@ def submit_tasks(self, descriptions): publish=True, push=True) ret += tasks + self._rep.progress_done() + if ret_list: return ret else : return ret[0] From 644501e0865cca03446fc190fb4e44ff0624d503 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 21 Mar 2022 11:27:07 +0100 Subject: [PATCH 021/171] docs --- .../pilot/configs/session_default.json | 3 +- src/radical/pilot/session.py | 131 ++++++++++++------ 2 files changed, 91 insertions(+), 43 deletions(-) diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index c1fff96794..8eeb70daa1 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -3,7 +3,8 @@ # specified. It contains the minimal set of settings required for # a functional rp session, both on the client and on the agent side. { - "service_url" : "${RADICAL_PILOT_SERVICE_URL}", + "proxy_url" : "${RADICAL_PILOT_PROXY_URL}", + "proxy_host" : "${RADICAL_PILOT_PROXY_HOST:localhost}", "session_base" : "${RADICAL_PILOT_SESSION_BASE:$PWD}", "heartbeat" : { diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 66bdca5529..76625d27e5 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -47,40 +47,75 @@ class Session(rs.Session): # -------------------------------------------------------------------------- # - def __init__(self, service_url=None, uid=None, cfg=None, _primary=True, + def __init__(self, proxy_host=None, proxy_url=None, uid=None, + cfg=None, _primary=True, **close_options): ''' Creates a new session. A new Session instance is created and stored in the database. + Any RP Session will require an RP Proxy to facilitate communication + between the client machine (i.e., the host where the application created + the Session instance) and the target resource (i.e., the host where the + pilot agent/s is/are running and where the workload is being executed). + + A `proxy_url` can be specified which then must point to an RP Proxy + Service instance which this session can use to establish a communication + proxy. Alternatively, a `proxy_host` can be specified - the session will + then attempt to start a proxy on that host. If neither `proxy_url` nor + `proxy_host` are specified, the session will check for the environment + variables `RADICAL_PILOT_PROXY_URL` and `RADICAL_PILOT_PROXY_HOST` (in + that order) and will interpret them as above. If none of these + information is available, the session will instantiate a proxy on the + local host. Note that any proxy instantiated by the session itself will + be terminated once the session instance is closed or goes out of scope + and is thus garbage collected. An proxy pointed to by `proxy_url` or by + `RADICAL_PILOT_PROXY_URL` will be reusable by other sessions and will + not terminate on the session's demise. + + Note: an RP proxy will have to be accessible by both the client and the + target hosts to facilitate communication between both parties. + That implies access to the respective ports. Proxies started by + the session itself will use the first port larger than 10.000 + which is found to be free. + **Arguments:** - * **service_url** (`string`): The Bridge Service URL. - If none is given, RP uses the environment variable - RADICAL_PILOT_SERVICE_URL. If that is not set, a temporary - service will be started on localhost for the time of the - application's execution. + * **proxy_url** (`string`): proxy service URL - points to an RP + proxy service which is used to establish an RP communication proxy + for this session. - * **cfg** (`str` or `dict`): a named or instantiated configuration - to be used for the session. + * **proxy_host** (`string`): proxy host - alternative to the + `proxy_url`, the application can specify a host name on which + a temporary proxy is started by the session. This default to + `localhost` (but see remarks above about the interpretation of + environment variables). * **uid** (`string`): Create a session with this UID. Session UIDs MUST be unique - otherwise they will lead to conflicts in the underlying database, resulting in undefined behaviours (or worse). + * **cfg** (`str` or `dict`): a named or instantiated configuration + to be used for the session. + * **_primary** (`bool`): only sessions created by the original - application process (via `rp.Session()`, will create comm bridges + application process (via `rp.Session()`, will create proxies. Secondary session instances are instantiated internally in processes spawned (directly or indirectly) by the initial session, for example in some of it's components. A secondary session will inherit the original session ID, but will not attempt to create - a new comm bridge - if such a bridge connection is needed, the - component will connect to the one created by the primary session. + a new proxy - if a proxy is needed, the component will connect to + the one created by the primary session. If additional key word arguments are provided, they will be used as the default arguments to Session.close(). (This can be useful when the Session is used as a Python context manager, such that close() is called automatically at the end of a ``with`` block.) ''' + + # TODO: document valid config options (or remove this option) + # * **cfg** (`str` or `dict`): a named or instantiated configuration + # to be used for the session. + self._close_options = _CloseOptions(close_options) # NOTE: `name` and `cfg` are overloaded, the user cannot point to # a predefined config and amend it at the same time. This might @@ -90,7 +125,7 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True, name = cfg cfg = None - self._service = None + self._proxy = None self._closed = False self._primary = _primary self._t_start = time.time() @@ -150,32 +185,44 @@ def __init__(self, service_url=None, uid=None, cfg=None, _primary=True, self._rep.info ('< Date: Mon, 4 Apr 2022 18:26:48 +0200 Subject: [PATCH 022/171] add missing files --- src/radical/pilot/proxy.py | 343 +++++++++++++++++++++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 src/radical/pilot/proxy.py diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py new file mode 100644 index 0000000000..2dc8488e53 --- /dev/null +++ b/src/radical/pilot/proxy.py @@ -0,0 +1,343 @@ + +import sys +import time +import queue + +import threading as mt +import multiprocessing as mp +import radical.utils as ru + + +_TIMEOUT = 300 # time to keep the bridge alive +_LINGER_TIMEOUT = 250 # ms to linger after close +_HIGH_WATER_MARK = 0 # number of messages to buffer before dropping + # 0: infinite + + +# ------------------------------------------------------------------------------ +# This ZMQ bridge links clients and agents, and bridges network gaps. As such +# it needs to run on a resource which has a public IP address that can be +# reached from both the client and the server machine. +# +# The bridge listens on a `REP` socket (`bridge_request`) for incoming client or +# agent connections, identified by a common session ID. A client connection +# will trigger the creation of the following communication channels: +# +# - proxy_control_pubsub_bridge +# links client and agent control pubsubs (includes heartbeat) +# - proxy_state_pubsub_bridge +# forwards task state updates from agents to client +# - proxy_task_queue +# forwards tasks from the client to the agents and vice versa +# +# +# The protocol on the `bridge_request` channel is as follows: +# +# client_register +# --------------- +# +# request: +# 'cmd': 'client_register' +# 'arg': 'sid': +# +# reply: +# 'res': {'proxy_control_pubsub': {'sub': , 'pub': }, +# 'proxy_state_pubsub' : {'sub': , 'pub': }, +# 'proxy_task_queue' : {'put': , 'get': }} +# +# notes: +# - the request will fail if the session ID is known from another +# `client_register` call +# 'err': 'sid already connected' +# - this request should otherwise always succeed +# - the created pubsub channels will be terminated if the control channel +# has not seen a client heartbeat for <10 * heartbeat_interval> seconds +# - see semantics of the 'client_unregister' request for details. +# - the same termination semantics holds for the 'client_unregister' +# request. +# - any task queues which exist for that session at the time of +# termination will also be closed, disregarding any data held in those +# queues. +# +# +# client_lookup +# --------------- +# +# request: +# 'cmd': 'client_lookup' +# 'arg': 'sid': +# +# reply: +# 'res': {'proxy_control_pubsub': {'sub': , 'pub': }, +# 'proxy_state_pubsub' : {'sub': , 'pub': }, +# 'proxy_task_queue' : {'put': , 'get': }} +# +# notes: +# - the request will fail if the session ID is not registered (anymore) +# - this request should otherwise always succeed +# - the call returns the same information as `client_register`, but does +# not alter the state of the client's bridge in any other way. +# - the request does not count as a heartbeat +# +# +# client_unregister +# ----------------- +# +# request: +# 'cmd': 'client_unregister' +# 'arg': 'sid': +# +# reply: +# 'res': 'ok' +# +# - this method only fails when the session is not connected, with +# 'err': 'session not connected' +# - in all other cases, the request will cause the immediate termination of +# all ZMQ bridges (pubsubs and queues) previously created for that +# session, disregarding of their state, and disposing all undelivered +# messages still held in the bridges. +# +# +# client_heartbeat +# ---------------- +# +# request: +# 'cmd': 'client_heartbeat' +# 'arg': 'sid': +# +# reply: +# 'res': {'time': } +# +# notes: +# - this request will fail if the session is either not connected or timed +# because of an earlier heartbeat failure: +# 'err': 'session not connected' +# - it will otherwise ensure the server that the client is still alive and +# requires the bridge to be up. If the server does not receive a heartbeat +# for longer than TIMEOUT seconds, the bridge will be terminated. +# +# +# default error mode +# ------------------ +# +# To any request other than the above, the ZMQ bridge will respond: +# 'err': 'invalid request' +# +# ------------------------------------------------------------------------------ + +# ------------------------------------------------------------------------------ +# +class Proxy(ru.zmq.Server): + + def __init__(self): + + self._lock = mt.Lock() + self._clients = dict() + + ru.zmq.Server.__init__(self, url='tcp://*:10000+') + + self._monitor_thread = mt.Thread(target=self._monitor) + self._monitor_thread.daemon = True + self._monitor_thread.start() + + self.register_request('client_register', self._client_register) + self.register_request('client_lookup', self._client_lookup) + self.register_request('client_unregister', self._client_unregister) + self.register_request('client_heartbeat', self._client_heartbeat) + self.register_request('service_stop', self._service_stop) + + + # -------------------------------------------------------------------------- + # + def _monitor(self): + + # this is a daemon thread - it never exits until process termination + while True: + + time.sleep(10) + now = time.time() + + # iterate w/o lock, and thus get a snapshot of the known sids + sids = list(self._clients.keys()) + + to_terminate = list() + for sid in sids: + + client = self._clients.get(sid) + if not client: + continue + + if now > (client['hb'] + _TIMEOUT): + self._log.warn('client %s timed out' % sid) + to_terminate.append(sid) + + if not to_terminate: + continue + + with self._lock: + + for sid in to_terminate: + + client = self._clients.get(sid) + if not client: + continue + + client['term'].set() + client['proc'].join() + del(self._clients[sid]) + + + # -------------------------------------------------------------------------- + # + def stop(self): + + for sid in self._clients: + self._log.info('stop client %s' % sid) + self._clients[sid]['term'].set() + + self._log.info('stop proxy service') + ru.zmq.Server.stop(self) + + + # -------------------------------------------------------------------------- + # + def _client_register(self, arg): + + sid = arg['sid'] + + if sid in self._clients: + raise RuntimeError('client already registered') + + q = mp.Queue() + term = mp.Event() + proc = mp.Process(target=self._worker, args=(sid, q, term)) + proc.start() + + try: + data = q.get(timeout=10) + except queue.Empty: + proc.terminate() + raise RuntimeError('worker startup failed') + + self._clients[sid] = {'proc': proc, + 'term': term, + 'data': data, + 'hb' : time.time()} + + return self._clients[sid]['data'] + + + # -------------------------------------------------------------------------- + # + def _worker(self, sid, q, term): + + log = ru.Logger('radical.pilot.bridge', level='debug', path=sid) + + proxy_cp = None + proxy_sp = None + proxy_aq = None + + try: + proxy_cp = ru.zmq.PubSub(cfg={'channel': 'proxy_control_pubsub', + 'uid' : 'proxy_control_pubsub', + 'type' : 'pubsub', + 'log_lvl': 'debug', + 'path' : sid}) + + proxy_sp = ru.zmq.PubSub(cfg={'channel': 'proxy_state_pubsub', + 'uid' : 'proxy_state_pubsub', + 'type' : 'pubsub', + 'log_lvl': 'debug', + 'path' : sid}) + + proxy_aq = ru.zmq.Queue (cfg={'channel': 'proxy_task_queue', + 'uid' : 'proxy_task_queue', + 'type' : 'queue', + 'log_lvl': 'debug', + 'path' : sid}) + + proxy_cp.start() + proxy_sp.start() + proxy_aq.start() + + data = {'proxy_control_pubsub': {'pub': str(proxy_cp.addr_pub), + 'sub': str(proxy_cp.addr_sub)}, + 'proxy_state_pubsub' : {'pub': str(proxy_sp.addr_pub), + 'sub': str(proxy_sp.addr_sub)}, + 'proxy_task_queue' : {'put': str(proxy_aq.addr_put), + 'get': str(proxy_aq.addr_get)}} + + # inform service about endpoint details + q.put(data) + + # we run forever until we receive a termination command + log.info('work') + term.wait() + + + except: + log.exception('worker failed') + + finally: + + if proxy_cp: proxy_cp.stop() + if proxy_sp: proxy_sp.stop() + if proxy_aq: proxy_aq.stop() + + log.info('terminated') + + + # -------------------------------------------------------------------------- + # + def _client_lookup(self, arg): + + sid = arg['sid'] + + with self._lock: + if sid not in self._clients: + raise RuntimeError('client %s not registered' % sid) + + return self._clients[sid]['data'] + + + # -------------------------------------------------------------------------- + # + def _client_unregister(self, arg): + + sid = arg['sid'] + + with self._lock: + + if sid not in self._clients: + raise RuntimeError('client %s not registered' % sid) + + self._clients[sid]['term'].set() + self._clients[sid]['proc'].join() + + del(self._clients[sid]) + + + # -------------------------------------------------------------------------- + # + def _client_heartbeat(self, arg): + + sid = arg['sid'] + now = time.time() + + with self._lock: + + if sid not in self._clients: + raise RuntimeError('client %s not registered' % sid) + + self._clients[sid]['hb'] = now + + + # -------------------------------------------------------------------------- + # + def _service_stop(self): + + self.stop() + + +# ------------------------------------------------------------------------------ + From 343f82675fce1ef5ac0cee23471e271848649018 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 15 Apr 2022 12:53:23 +0200 Subject: [PATCH 023/171] fix var name --- src/radical/pilot/utils/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 53f7d2dda1..338b22e0a9 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -1086,7 +1086,7 @@ def work_cb(self): # FIXME: a simple, 1-thing caching mechanism would likely # remove the req/res overhead completely (for any # non-trivial worker). - things = queue.get_nowait(qname=qname, timeout=200) # microseconds + things = queue.get_nowait(qname=name, timeout=200) # microseconds # self._log.debug('work_cb %s: %s %s %d', name, queue.channel, # qname, len(things)) things = ru.as_list(things) From 68b7da3f8badf3483835d39d850dbe6748cb0342 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 15 Apr 2022 20:39:13 +0200 Subject: [PATCH 024/171] snapshot --- src/radical/pilot/pmgr/launching/base.py | 29 +++++++++--------------- src/radical/pilot/session.py | 23 ++++++++----------- src/radical/pilot/utils/component.py | 7 ++++-- 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index eae1de8afb..a27031a139 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -299,13 +299,6 @@ def work(self, pilots): self._start_pilot_bulk(resource, schema, pilots) - # Update the Pilots' state to 'PMGR_ACTIVE_PENDING' if job - # submission was successful. Since the pilot leaves the - # scope of the PMGR for the time being, we update the - # complete DB document - for pilot in pilots: - pilot['$all'] = True - self.advance(pilots, rps.PMGR_ACTIVE_PENDING, push=False, publish=True) @@ -460,6 +453,7 @@ def _start_pilot_bulk(self, resource, schema, pilots): cmd = 'ln -s %s %s/%s' % (os.path.abspath(src), tmp_dir, tgt) out, err, ret = ru.sh_callout(cmd, shell=True) if ret: + self._log.debug('cmd: %s', cmd) self._log.debug('out: %s', out) self._log.debug('err: %s', err) raise RuntimeError('callout failed: %s' % cmd) @@ -470,6 +464,7 @@ def _start_pilot_bulk(self, resource, schema, pilots): out, err, ret = ru.sh_callout(cmd, shell=True) if ret: + self._log.debug('cmd: %s', cmd) self._log.debug('out: %s', out) self._log.debug('err: %s', err) raise RuntimeError('callout failed: %s' % cmd) @@ -534,8 +529,8 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # ---------------------------------------------------------------------- # Database connection parameters - sid = self._session.uid - database_url = self._session.cfg.dburl + sid = self._session.uid + proxy_url = self._session.cfg.proxy_url # some default values are determined at runtime default_virtenv = '%%(resource_sandbox)s/ve.%s.%s' % \ @@ -558,7 +553,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # ---------------------------------------------------------------------- # get parameters from resource cfg, set defaults where needed - agent_dburl = rcfg.get('agent_mongodb_endpoint', database_url) + agent_proxy_url = rcfg.get('agent_proxy_url', proxy_url) agent_spawner = rcfg.get('agent_spawner', DEFAULT_AGENT_SPAWNER) agent_config = rcfg.get('agent_config', DEFAULT_AGENT_CONFIG) agent_scheduler = rcfg.get('agent_scheduler') @@ -647,11 +642,11 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): raise RuntimeError("'global_virtenv' is deprecated (%s)" % resource) # Create a host:port string for use by the bootstrap_0. - db_url = ru.Url(agent_dburl) - if db_url.port: - db_hostport = "%s:%d" % (db_url.host, db_url.port) + tmp = ru.Url(agent_proxy_url) + if tmp.port: + hostport = "%s:%d" % (tmp.host, tmp.port) else: - db_hostport = "%s:%d" % (db_url.host, 27017) # mongodb default + raise RuntimeError('service URL needs port number: %s' % tmp) # ---------------------------------------------------------------------- # the version of the agent is derived from @@ -834,7 +829,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): # set optional args if resource_manager == "CCM": bs_args.extend(['-c']) if forward_tunnel_endpoint: bs_args.extend(['-f', forward_tunnel_endpoint]) - if forward_tunnel_endpoint: bs_args.extend(['-h', db_hostport]) + if forward_tunnel_endpoint: bs_args.extend(['-h', hostport]) if python_interpreter: bs_args.extend(['-i', python_interpreter]) if tunnel_bind_device: bs_args.extend(['-t', tunnel_bind_device]) if cleanup: bs_args.extend(['-x', cleanup]) @@ -852,7 +847,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): agent_cfg['scheduler'] = agent_scheduler agent_cfg['runtime'] = runtime agent_cfg['app_comm'] = app_comm - agent_cfg['dburl'] = str(database_url) + agent_cfg['proxy_url'] = agent_proxy_url agent_cfg['sid'] = sid agent_cfg['pid'] = pid agent_cfg['pmgr'] = self._pmgr @@ -875,11 +870,9 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): agent_cfg['resource_cfg'] = copy.deepcopy(rcfg) agent_cfg['debug'] = self._log.getEffectiveLevel() - # we'll also push the agent config into MongoDB pilot['cfg'] = agent_cfg pilot['resources'] = {'cpu': requested_cores, 'gpu': requested_gpus} - pilot['$set'] = ['resources'] # ---------------------------------------------------------------------- diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index eafbdcd426..5733847f1a 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -219,22 +219,20 @@ def __init__(self, proxy_host=None, proxy_url=None, uid=None, proxy_url = self._proxy_addr os.environ['RADICAL_PILOT_SERVICE_URL'] = proxy_url - self._cfg.proxy_url = proxy_url - if self._primary: - self._connect_proxy() + if self._primary: + self._start_primary() else: # a non-primary session will query the same service url to obtain # information about the comm channels created by the primary session - if not self._cfg.proxy_url: - self._proxy = ru.zmq.Client(url=self._cfg.proxy_url) - response = self._proxy.request('client_lookup', - {'sid': self._uid}) - self._cfg.proxy = response - self._log.debug('=== %s: %s', self._primary, self._cfg.proxy) + self._proxy = ru.zmq.Client(url=self._cfg.proxy_url) + response = self._proxy.request('client_lookup', + {'sid': self._uid}) + self._cfg.proxy = response + self._log.debug('=== %s: %s', self._primary, self._cfg.proxy) # for mostly debug purposes, dump the used session config @@ -388,12 +386,9 @@ def _run_proxy(self): bridge.wait() -# ------------------------------------------------------------------------------ - - # -------------------------------------------------------------------------- # - def _connect_proxy(self): + def _start_primary(self): assert(self._primary) @@ -407,7 +402,7 @@ def _connect_proxy(self): {'sid': self._uid}) self._cfg.proxy = response - self._log.debug('=== %s: %s', self._primary, self._cfg.proxy) + self._log.debug('=== %s', self._cfg.proxy) # now that the proxy bridges have been created on the service host, # write config files for them so that all components can use them diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 338b22e0a9..7f5530f965 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -221,7 +221,9 @@ def start_components(self, cfg=None): and start them ''' - self._prof.prof('start_components_start', uid=self._uid) + self._prof.prof('start_components_start: %s', uid=self._uid) + import pprint + self._log.debug('=== cmgr: %s', pprint.pformat(self._cfg.as_dict())) timeout = self._cfg.heartbeat.timeout @@ -247,11 +249,12 @@ def start_components(self, cfg=None): ccfg.sid = cfg.sid ccfg.base = cfg.base ccfg.path = cfg.path + ccfg.proxy_url = cfg.proxy_url ccfg.heartbeat = cfg.heartbeat ru.dict_merge(ccfg, scfg, policy=ru.PRESERVE, log=self._log) - fname = '%s/%s.json' % (cfg.path, ccfg.uid) + fname = '%s/%s.cfg' % (cfg.path, ccfg.uid) ccfg.write(fname) self._log.info('create component %s [%s]', cname, ccfg.uid) From f6632f55795997731656b40d57115fba835b26a0 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 26 Jul 2022 11:53:21 +0200 Subject: [PATCH 025/171] resolved botched merge --- src/radical/pilot/utils/session.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/radical/pilot/utils/session.py b/src/radical/pilot/utils/session.py index 42ad730bb3..29af5c082b 100644 --- a/src/radical/pilot/utils/session.py +++ b/src/radical/pilot/utils/session.py @@ -75,18 +75,10 @@ def fetch_profiles (sid, src=None, tgt=None, access=None, if not os.path.isfile(client_profile): raise RuntimeError('profile %s does not exist' % client_profile) -<<<<<<< HEAD # FIXME: MongoDB json_docs = ... return pilots = json_docs['pilot'] -======= - _, db, _, _, _ = ru.mongodb_connect (dburl) - - json_docs = get_session_docs(sid, db) - - pilots = json_docs['pilot'] ->>>>>>> devel num_pilots = len(pilots) log.debug("Session: %s", sid) From f4aca45c960508643645ea97b37ec334ac39f6fa Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 25 Aug 2022 15:14:51 +0200 Subject: [PATCH 026/171] code consistency, docstrings --- src/radical/pilot/agent/agent_0.py | 2 +- src/radical/pilot/proxy.py | 50 ++-- src/radical/pilot/session.py | 415 ++++++++++++++++------------- 3 files changed, 261 insertions(+), 206 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 901bfe2d73..efa2483d5a 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -63,7 +63,7 @@ def __init__(self, cfg, session): self._prof.prof('hostname', uid=cfg.pid, msg=ru.get_hostname()) # run an inline registry service to share runtime config with other - # agents and components + # agent components reg_uid = 'radical.pilot.reg.%s' % self._uid self._reg_service = ru.zmq.Registry(uid=reg_uid) self._reg_service.start() diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py index 2dc8488e53..58d5b270b2 100644 --- a/src/radical/pilot/proxy.py +++ b/src/radical/pilot/proxy.py @@ -33,11 +33,11 @@ # # The protocol on the `bridge_request` channel is as follows: # -# client_register -# --------------- +# register +# -------- # # request: -# 'cmd': 'client_register' +# 'cmd': 'register' # 'arg': 'sid': # # reply: @@ -47,24 +47,24 @@ # # notes: # - the request will fail if the session ID is known from another -# `client_register` call +# `register` call # 'err': 'sid already connected' # - this request should otherwise always succeed # - the created pubsub channels will be terminated if the control channel # has not seen a client heartbeat for <10 * heartbeat_interval> seconds -# - see semantics of the 'client_unregister' request for details. -# - the same termination semantics holds for the 'client_unregister' +# - see semantics of the 'unregister' request for details. +# - the same termination semantics holds for the 'unregister' # request. # - any task queues which exist for that session at the time of # termination will also be closed, disregarding any data held in those # queues. # # -# client_lookup -# --------------- +# lookup +# ------ # # request: -# 'cmd': 'client_lookup' +# 'cmd': 'lookup' # 'arg': 'sid': # # reply: @@ -75,16 +75,16 @@ # notes: # - the request will fail if the session ID is not registered (anymore) # - this request should otherwise always succeed -# - the call returns the same information as `client_register`, but does +# - the call returns the same information as `register`, but does # not alter the state of the client's bridge in any other way. # - the request does not count as a heartbeat # # -# client_unregister -# ----------------- +# unregister +# ---------- # # request: -# 'cmd': 'client_unregister' +# 'cmd': 'unregister' # 'arg': 'sid': # # reply: @@ -98,11 +98,11 @@ # messages still held in the bridges. # # -# client_heartbeat -# ---------------- +# heartbeat +# --------- # # request: -# 'cmd': 'client_heartbeat' +# 'cmd': 'heartbeat' # 'arg': 'sid': # # reply: @@ -140,11 +140,11 @@ def __init__(self): self._monitor_thread.daemon = True self._monitor_thread.start() - self.register_request('client_register', self._client_register) - self.register_request('client_lookup', self._client_lookup) - self.register_request('client_unregister', self._client_unregister) - self.register_request('client_heartbeat', self._client_heartbeat) - self.register_request('service_stop', self._service_stop) + self.register_request('register', self._register) + self.register_request('lookup', self._lookup) + self.register_request('unregister', self._unregister) + self.register_request('heartbeat', self._heartbeat) + self.register_request('service_stop', self._service_stop) # -------------------------------------------------------------------------- @@ -201,7 +201,7 @@ def stop(self): # -------------------------------------------------------------------------- # - def _client_register(self, arg): + def _register(self, arg): sid = arg['sid'] @@ -289,7 +289,7 @@ def _worker(self, sid, q, term): # -------------------------------------------------------------------------- # - def _client_lookup(self, arg): + def _lookup(self, arg): sid = arg['sid'] @@ -302,7 +302,7 @@ def _client_lookup(self, arg): # -------------------------------------------------------------------------- # - def _client_unregister(self, arg): + def _unregister(self, arg): sid = arg['sid'] @@ -319,7 +319,7 @@ def _client_unregister(self, arg): # -------------------------------------------------------------------------- # - def _client_heartbeat(self, arg): + def _heartbeat(self, arg): sid = arg['sid'] now = time.time() diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 5733847f1a..5406014012 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -44,33 +44,42 @@ class Session(rs.Session): # the reporter is an applicataion-level singleton _reporter = None + # a session has one of three possible roles: + # - primary: the session is the first explicit session instance created in + # an RP application. + # - agent: the session is the first session instance created in an RP + # agent. + # - default: any other session instance, for example such as created by + # components in the client or agent module. + _PRIMARY = 0 + _AGENT = 1 + _DEFAULT = 2 + # -------------------------------------------------------------------------- # - def __init__(self, proxy_host=None, proxy_url=None, uid=None, - cfg=None, _primary=True, - **close_options): + def __init__(self, proxy_url=None, proxy_host=None, uid=None, + cfg=None, _role=_PRIMARY, **close_options): ''' Creates a new session. A new Session instance is created and stored in the database. Any RP Session will require an RP Proxy to facilitate communication between the client machine (i.e., the host where the application created - the Session instance) and the target resource (i.e., the host where the + this Session instance) and the target resource (i.e., the host where the pilot agent/s is/are running and where the workload is being executed). A `proxy_url` can be specified which then must point to an RP Proxy Service instance which this session can use to establish a communication proxy. Alternatively, a `proxy_host` can be specified - the session will - then attempt to start a proxy on that host. If neither `proxy_url` nor - `proxy_host` are specified, the session will check for the environment - variables `RADICAL_PILOT_PROXY_URL` and `RADICAL_PILOT_PROXY_HOST` (in - that order) and will interpret them as above. If none of these - information is available, the session will instantiate a proxy on the - local host. Note that any proxy instantiated by the session itself will - be terminated once the session instance is closed or goes out of scope - and is thus garbage collected. An proxy pointed to by `proxy_url` or by - `RADICAL_PILOT_PROXY_URL` will be reusable by other sessions and will - not terminate on the session's demise. + then attempt to start a proxy service on that host. If neither + `proxy_url` nor `proxy_host` are specified, the session will check for + the environment variables `RADICAL_PILOT_PROXY_URL` and + `RADICAL_PILOT_PROXY_HOST` (in that order) and will interpret them as + above. If none of these information is available, the session will + instantiate a proxy service on the local host. Note that any proxy + service instantiated by the session itself will be terminated once the + session instance is closed or goes out of scope and is thus garbage + collected and as such should not be used by other session instances. Note: an RP proxy will have to be accessible by both the client and the target hosts to facilitate communication between both parties. @@ -96,14 +105,15 @@ def __init__(self, proxy_host=None, proxy_url=None, uid=None, * **cfg** (`str` or `dict`): a named or instantiated configuration to be used for the session. - * **_primary** (`bool`): only sessions created by the original - application process (via `rp.Session()`, will create proxies. - Secondary session instances are instantiated internally in - processes spawned (directly or indirectly) by the initial session, - for example in some of it's components. A secondary session will - inherit the original session ID, but will not attempt to create - a new proxy - if a proxy is needed, the component will connect to - the one created by the primary session. + * **_role** (`bool`): only `PRIMARY` sessions created by the + original application process (via `rp.Session()`), will create + proxies and Registry Serivices. `AGENT` sessions will also create + a Registry but no proxies. All other `DEFAULT` session instances + are instantiated internally in processes spawned (directly or + indirectly) by the initial session, for example in some of it's + components, or by the RP agent. Those sessions will inherit + the original session ID, but will not attempt to create a new + proxies or registries. If additional key word arguments are provided, they will be used as the default arguments to Session.close(). (This can be useful when the @@ -111,55 +121,70 @@ def __init__(self, proxy_host=None, proxy_url=None, uid=None, automatically at the end of a ``with`` block.) ''' - # TODO: document valid config options (or remove this option) - # * **cfg** (`str` or `dict`): a named or instantiated configuration - # to be used for the session. + # TODO: document valid config options self._close_options = _CloseOptions(close_options) - # NOTE: `name` and `cfg` are overloaded, the user cannot point to - # a predefined config and amend it at the same time. This might - # be ok for the session, but introduces a minor API inconsistency. - name = 'default' - if isinstance(cfg, str): - name = cfg - cfg = None - self._proxy = None + self._role = _role self._closed = False - self._primary = _primary self._t_start = time.time() - self._pmgrs = dict() # map IDs to pmgr instances - self._tmgrs = dict() # map IDs to tmgr instances - self._cmgr = None # only primary sessions have a cmgr + self._proxy = None # proxy client instance + self._reg = None # registry client instance - self._cfg = ru.Config('radical.pilot.session', name=name, cfg=cfg) - self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) + self._pmgrs = dict() # map IDs to pmgr instances + self._tmgrs = dict() # map IDs to tmgr instances + self._cmgr = None # only primary sessions have a cmgr - pwd = os.getcwd() + if uid: self._uid = uid + else : self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) - if not self._cfg.sid: - if uid: - self._cfg.sid = uid - else: - self._cfg.sid = ru.generate_id('rp.session', - mode=ru.ID_PRIVATE) - if not self._cfg.base: - self._cfg.base = pwd + if self._role == self._PRIMARY: + self._rep.info ('<>ok\n') + + # -------------------------------------------------------------------------- + # + def _init_cfg(self, cfg): + + # NOTE: `cfg_name` and `cfg` are overloaded, the user cannot point to + # a predefined config and amend it at the same time. This might + # be ok for the session, but introduces a minor API inconsistency. + + cfg_name = 'default' + if isinstance(cfg, str): + cfg_name = cfg + cfg = None + + self._cfg = ru.Config('radical.pilot.session', name=cfg_name, cfg=cfg) + self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) + + # ensure we have basic settings + if self._cfg.sid: + assert(self._uid == self._cfg.sid) + + # session path: where to store logfiles etc. + if self._cfg.path: self._path = self._cfg.path + else : self._path = '%s/%s' % (os.getcwd(), self._cfg.sid) # change RU defaults to point logfiles etc. to the session sandbox def_cfg = ru.DefaultConfig() - def_cfg.log_dir = self._cfg.path - def_cfg.report_dir = self._cfg.path - def_cfg.profile_dir = self._cfg.path - - self._uid = self._cfg.sid + def_cfg.log_dir = self._path + def_cfg.report_dir = self._path + def_cfg.profile_dir = self._path self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) @@ -168,8 +193,12 @@ def __init__(self, proxy_host=None, proxy_url=None, uid=None, self._prof.prof('session_start', uid=self._uid) - # now we have config and uid - initialize base class (saga session) - rs.Session.__init__(self, uid=self._uid) + # client sandbox: base for relative staging paths + if self._role == self._PRIMARY: + if not self._cfg.client_sandbox: + self._cfg.client_sandbox = os.getcwd() + else: + assert(self._cfg.client_sandbox) # cache sandboxes etc. self._cache_lock = ru.RLock() @@ -180,9 +209,48 @@ def __init__(self, proxy_host=None, proxy_url=None, uid=None, 'js_shells' : dict(), 'fs_dirs' : dict()} - if self._primary: - self._rep.info ('<>ok\n') # -------------------------------------------------------------------------- @@ -306,7 +438,7 @@ def close(self, **kwargs): if self._proxy: try: self._log.debug("session %s closes service", self._uid) - self._proxy.request('client_unregister', + self._proxy.request('unregister', {'sid': self._uid}) except: pass @@ -386,76 +518,6 @@ def _run_proxy(self): bridge.wait() - # -------------------------------------------------------------------------- - # - def _start_primary(self): - - assert(self._primary) - - # a primary session will create proxy comm channels - self._rep.info ('< Date: Mon, 3 Oct 2022 17:00:28 -0400 Subject: [PATCH 027/171] adding resource_description class --- src/radical/pilot/resource_description.py | 112 ++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 src/radical/pilot/resource_description.py diff --git a/src/radical/pilot/resource_description.py b/src/radical/pilot/resource_description.py new file mode 100644 index 0000000000..634ae0a89f --- /dev/null +++ b/src/radical/pilot/resource_description.py @@ -0,0 +1,112 @@ +# pylint: disable=access-member-before-definition + +__copyright__ = 'Copyright 2013-2021, The RADICAL-Cybertools Team' +__license__ = 'MIT' + +import radical.utils as ru + +DESCRIPTION = 'description' +NOTES = 'notes' +SCHEMAS = 'schemas' +DEFAULT_REMOTE_WORKDIR = 'default_remote_workdir' +DEFAULT_QUEUE = 'default_queue' +RESOURCE_MANAGER = 'default_manager' +AGENT_CONFIG = 'agent_config' +AGENT_SCHEDULER = 'agent_scheduler' +AGENT_SPAWNER = 'agent_spawner' +PRE_BOOTSTRAP_0 = 'pre_bootstrap_0' +PRE_BOOTSTRAP_1 = 'pre_bootstrap_1' +RP_VERSION = 'rp_version' +VIRTENV_MODE = 'virtenv_mode' +VIRTENV_DIST = 'virtenv_dist' +PYTHON_DIST = 'python_dist' +LAUNCH_METHODS = 'launch_methods' +LFS_PATH_PER_NODE = 'lfs_path_per_node' +LFS_SIZE_PER_NODE = 'lfs_size_per_node' +MEM_PER_NODE = 'mem_per_node' +CORES_PER_NODE = 'cores_per_node' +GPUS_PER_NODE = 'gpus_per_node' +BLOCKED_CORES = 'blocked_cores' +BLOCKED_GPUS = 'blocker_gpus' +SYSTEM_ARCHITECTURE = 'system_architecture' + +# ------------------------------------------------------------------------------ +# +class ResourceDescription(ru.TypedDict): + """ + docstrings goes here + """ + + _schema = { + DESCRIPTION : str , + NOTES : str , + SCHEMAS : [dict()] , + + DEFAULT_REMOTE_WORKDIR : str , + DEFAULT_QUEUE : str , + RESOURCE_MANAGER : str , + AGENT_CONFIG : str , + AGENT_SCHEDULER : str , + AGENT_SPAWNER : str , + PRE_BOOTSTRAP_0 : [str] , + PRE_BOOTSTRAP_1 : [str] , + RP_VERSION : str , + VIRTENV_MODE : str , + VIRTENV_DIST : str , + PYTHON_DIST : str , + LAUNCH_METHODS : dict(), + LFS_PATH_PER_NODE : str , + LFS_SIZE_PER_NODE : str , + MEM_PER_NODE : int , + CORES_PER_NODE : int , + GPUS_PER_NODE : int , + BLOCKED_CORES : [int] , + BLOCKED_GPUS : [int] , + SYSTEM_ARCHITECTURE : dict(), + } + + _defaults = { + DESCRIPTION : '' , + NOTES : '' , + SCHEMAS : [dict()] , + + DEFAULT_REMOTE_WORKDIR : '' , + DEFAULT_QUEUE : '' , + RESOURCE_MANAGER : '' , + AGENT_CONFIG : 'default' , + AGENT_SCHEDULER : 'CONTINUOUS', + AGENT_SPAWNER : 'POPEN' , + PRE_BOOTSTRAP_0 : list() , + PRE_BOOTSTRAP_1 : list() , + RP_VERSION : '' , + VIRTENV_MODE : '' , + VIRTENV_DIST : '' , + PYTHON_DIST : 'default' , + LAUNCH_METHODS : dict() , + LFS_PATH_PER_NODE : str , + LFS_SIZE_PER_NODE : str , + MEM_PER_NODE : int , + CORES_PER_NODE : int , + GPUS_PER_NODE : int , + BLOCKED_CORES : [int] , + BLOCKED_GPUS : [int] , + SYSTEM_ARCHITECTURE : dict() , + + } + + + # -------------------------------------------------------------------------- + # + def __init__(self, from_dict=None): + + super().__init__(from_dict=from_dict) + + + # -------------------------------------------------------------------------- + # + def _verify(self): + + pass + + +# ------------------------------------------------------------------------------ \ No newline at end of file From 0a8977d65af4ee89ad75c12fe855627c6b70d7fd Mon Sep 17 00:00:00 2001 From: Aymen Alsaadi <27039262+AymenFJA@users.noreply.github.com> Date: Mon, 3 Oct 2022 17:12:03 -0400 Subject: [PATCH 028/171] fix defaults --- src/radical/pilot/resource_description.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/radical/pilot/resource_description.py b/src/radical/pilot/resource_description.py index 634ae0a89f..bb0fc2fbee 100644 --- a/src/radical/pilot/resource_description.py +++ b/src/radical/pilot/resource_description.py @@ -83,11 +83,11 @@ class ResourceDescription(ru.TypedDict): VIRTENV_DIST : '' , PYTHON_DIST : 'default' , LAUNCH_METHODS : dict() , - LFS_PATH_PER_NODE : str , - LFS_SIZE_PER_NODE : str , - MEM_PER_NODE : int , - CORES_PER_NODE : int , - GPUS_PER_NODE : int , + LFS_PATH_PER_NODE : '' , + LFS_SIZE_PER_NODE : '' , + MEM_PER_NODE : 0 , + CORES_PER_NODE : 0 , + GPUS_PER_NODE : 0 , BLOCKED_CORES : [int] , BLOCKED_GPUS : [int] , SYSTEM_ARCHITECTURE : dict() , From 03e61a41b7e4b5b75dfcc725b11c4050cfd6e1d0 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 24 Oct 2022 16:03:33 +0200 Subject: [PATCH 029/171] type fixes --- src/radical/pilot/resource_description.py | 55 +++++++++++------------ 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/src/radical/pilot/resource_description.py b/src/radical/pilot/resource_description.py index bb0fc2fbee..ef46c492b9 100644 --- a/src/radical/pilot/resource_description.py +++ b/src/radical/pilot/resource_description.py @@ -38,37 +38,37 @@ class ResourceDescription(ru.TypedDict): """ _schema = { - DESCRIPTION : str , - NOTES : str , - SCHEMAS : [dict()] , - - DEFAULT_REMOTE_WORKDIR : str , - DEFAULT_QUEUE : str , - RESOURCE_MANAGER : str , - AGENT_CONFIG : str , - AGENT_SCHEDULER : str , - AGENT_SPAWNER : str , - PRE_BOOTSTRAP_0 : [str] , - PRE_BOOTSTRAP_1 : [str] , - RP_VERSION : str , - VIRTENV_MODE : str , - VIRTENV_DIST : str , - PYTHON_DIST : str , - LAUNCH_METHODS : dict(), - LFS_PATH_PER_NODE : str , - LFS_SIZE_PER_NODE : str , - MEM_PER_NODE : int , - CORES_PER_NODE : int , - GPUS_PER_NODE : int , - BLOCKED_CORES : [int] , - BLOCKED_GPUS : [int] , - SYSTEM_ARCHITECTURE : dict(), + DESCRIPTION : str , + NOTES : str , + SCHEMAS : [None] , # FIXME: define sub-schema + + DEFAULT_REMOTE_WORKDIR : str , + DEFAULT_QUEUE : str , + RESOURCE_MANAGER : str , + AGENT_CONFIG : str , + AGENT_SCHEDULER : str , + AGENT_SPAWNER : str , + PRE_BOOTSTRAP_0 : [str] , + PRE_BOOTSTRAP_1 : [str] , + RP_VERSION : str , + VIRTENV_MODE : str , + VIRTENV_DIST : str , + PYTHON_DIST : str , + LAUNCH_METHODS : {str: None}, # FIXME: define sub-schema + LFS_PATH_PER_NODE : str , + LFS_SIZE_PER_NODE : str , + MEM_PER_NODE : int , + CORES_PER_NODE : int , + GPUS_PER_NODE : int , + BLOCKED_CORES : [int] , + BLOCKED_GPUS : [int] , + SYSTEM_ARCHITECTURE : {str:str} , } _defaults = { DESCRIPTION : '' , NOTES : '' , - SCHEMAS : [dict()] , + SCHEMAS : list() , DEFAULT_REMOTE_WORKDIR : '' , DEFAULT_QUEUE : '' , @@ -91,7 +91,6 @@ class ResourceDescription(ru.TypedDict): BLOCKED_CORES : [int] , BLOCKED_GPUS : [int] , SYSTEM_ARCHITECTURE : dict() , - } @@ -109,4 +108,4 @@ def _verify(self): pass -# ------------------------------------------------------------------------------ \ No newline at end of file +# ------------------------------------------------------------------------------ From 4e2fe8c5a2d9fbc5e2456e943d47e706576ecdf0 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 24 Oct 2022 16:12:00 +0200 Subject: [PATCH 030/171] sub-schema for `schemas` --- src/radical/pilot/resource_description.py | 32 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/radical/pilot/resource_description.py b/src/radical/pilot/resource_description.py index ef46c492b9..feb8f35606 100644 --- a/src/radical/pilot/resource_description.py +++ b/src/radical/pilot/resource_description.py @@ -1,4 +1,3 @@ -# pylint: disable=access-member-before-definition __copyright__ = 'Copyright 2013-2021, The RADICAL-Cybertools Team' __license__ = 'MIT' @@ -7,7 +6,10 @@ DESCRIPTION = 'description' NOTES = 'notes' +DEFAULT_SCHEMA = 'default_schema' SCHEMAS = 'schemas' +JOB_MANAGER_ENDPOINT = 'job_manager_endpoint' +FILESYSTEM_ENDPOINT = 'filesystem_endpoint' DEFAULT_REMOTE_WORKDIR = 'default_remote_workdir' DEFAULT_QUEUE = 'default_queue' RESOURCE_MANAGER = 'default_manager' @@ -30,17 +32,34 @@ BLOCKED_GPUS = 'blocker_gpus' SYSTEM_ARCHITECTURE = 'system_architecture' + +# ------------------------------------------------------------------------------ +# +class AccessSchema(ru.TypedDict): + + _schema = { + JOB_MANAGER_ENDPOINT: str, + FILESYSTEM_ENDPOINT : str, + } + + _defaults = { + JOB_MANAGER_ENDPOINT: None, + FILESYSTEM_ENDPOINT : None, + } + + # ------------------------------------------------------------------------------ # class ResourceDescription(ru.TypedDict): - """ + ''' docstrings goes here - """ + ''' _schema = { DESCRIPTION : str , NOTES : str , - SCHEMAS : [None] , # FIXME: define sub-schema + DEFAULT_SCHEMA : str , + SCHEMAS : {str: AccessSchema}, DEFAULT_REMOTE_WORKDIR : str , DEFAULT_QUEUE : str , @@ -54,7 +73,7 @@ class ResourceDescription(ru.TypedDict): VIRTENV_MODE : str , VIRTENV_DIST : str , PYTHON_DIST : str , - LAUNCH_METHODS : {str: None}, # FIXME: define sub-schema + LAUNCH_METHODS : {str: None}, LFS_PATH_PER_NODE : str , LFS_SIZE_PER_NODE : str , MEM_PER_NODE : int , @@ -62,12 +81,13 @@ class ResourceDescription(ru.TypedDict): GPUS_PER_NODE : int , BLOCKED_CORES : [int] , BLOCKED_GPUS : [int] , - SYSTEM_ARCHITECTURE : {str:str} , + SYSTEM_ARCHITECTURE : {str: str} , } _defaults = { DESCRIPTION : '' , NOTES : '' , + DEFAULT_SCHEMA : '' , SCHEMAS : list() , DEFAULT_REMOTE_WORKDIR : '' , From 4550adecabb8c2d8476d28002241ead16aaeffc1 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 29 Oct 2022 11:49:46 +0200 Subject: [PATCH 031/171] fix botched merge --- src/radical/pilot/session.py | 91 +++++++++--------------------------- 1 file changed, 22 insertions(+), 69 deletions(-) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index dc8cbb8241..8cba18297a 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -407,7 +407,7 @@ def close(self, **kwargs): if self._closed: return - if self._primary: + if self._role == self._PRIMARY: self._rep.info('closing session %s' % self._uid) self._log.debug("session %s closing", self._uid) @@ -456,29 +456,18 @@ def close(self, **kwargs): self._prof.prof("session_fetch_start", uid=self._uid) self._log.debug('start download') tgt = self._cfg.base -<<<<<<< HEAD # FIXME: MongoDB - # self.fetch_json (tgt='%s/%s' % (tgt, self.uid)) -======= - self.fetch_json (tgt=tgt) ->>>>>>> devel + # self.fetch_json (tgt=tgt) self.fetch_profiles(tgt=tgt) self.fetch_logfiles(tgt=tgt) self._prof.prof("session_fetch_stop", uid=self._uid) -<<<<<<< HEAD if self._primary: self._t_stop = time.time() self._rep.info('<>ok\n') -======= - if self.closed and self.created: - self._rep.info('<>ok\n') ->>>>>>> devel # dump json json = {'session' : self.as_dict(), @@ -591,39 +580,15 @@ def uid(self): # -------------------------------------------------------------------------- # @property -<<<<<<< HEAD def path(self): return self._cfg.path -======= - def created(self): - '''Returns the UTC date and time the session was created. - ''' - if self._dbs: ret = self._dbs.created - else : ret = None - - if ret: - return float(ret) ->>>>>>> devel # -------------------------------------------------------------------------- # @property -<<<<<<< HEAD def proxy_url(self): return self._cfg.proxy_url -======= - def connected(self): - ''' - Return time when the session connected to the DB - ''' - - if self._dbs: ret = self._dbs.connected - else : ret = None - - if ret: - return float(ret) ->>>>>>> devel # -------------------------------------------------------------------------- @@ -636,21 +601,9 @@ def cfg(self): # -------------------------------------------------------------------------- # @property -<<<<<<< HEAD def cmgr(self): assert self._primary return self._cmgr -======= - def closed(self): - ''' - Returns the time of closing - ''' - if self._dbs: ret = self._dbs.closed - else : ret = None - - if ret: - return float(ret) ->>>>>>> devel # -------------------------------------------------------------------------- @@ -716,16 +669,16 @@ def _register_pmgr(self, pmgr): self._pmgrs[pmgr.uid] = pmgr - # -------------------------------------------------------------------------- - # - def _reconnect_pmgr(self, pmgr): - - if not self._dbs.get_pmgrs(pmgr_ids=pmgr.uid): - raise ValueError('could not reconnect to pmgr %s' % pmgr.uid) - - self._pmgrs[pmgr.uid] = pmgr - - + # # -------------------------------------------------------------------------- + # # + # def _reconnect_pmgr(self, pmgr): + # + # if not self._dbs.get_pmgrs(pmgr_ids=pmgr.uid): + # raise ValueError('could not reconnect to pmgr %s' % pmgr.uid) + # + # self._pmgrs[pmgr.uid] = pmgr + # + # # -------------------------------------------------------------------------- # def list_pilot_managers(self): @@ -775,16 +728,16 @@ def _register_tmgr(self, tmgr): self._tmgrs[tmgr.uid] = tmgr - # -------------------------------------------------------------------------- - # - def _reconnect_tmgr(self, tmgr): - - if not self._dbs.get_tmgrs(tmgr_ids=tmgr.uid): - raise ValueError('could not reconnect to tmgr %s' % tmgr.uid) - - self._tmgrs[tmgr.uid] = tmgr - - + # # -------------------------------------------------------------------------- + # # + # def _reconnect_tmgr(self, tmgr): + # + # if not self._dbs.get_tmgrs(tmgr_ids=tmgr.uid): + # raise ValueError('could not reconnect to tmgr %s' % tmgr.uid) + # + # self._tmgrs[tmgr.uid] = tmgr + # + # # -------------------------------------------------------------------------- # def list_task_managers(self): From 6ec4d748575a98bd238c02badbe0ef061073e0a1 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 30 Jan 2023 14:43:15 +0100 Subject: [PATCH 032/171] snapshot --- bin/radical-pilot-bridge | 52 +++++----- src/radical/pilot/agent/agent_0.py | 12 ++- src/radical/pilot/agent/agent_n.py | 12 ++- src/radical/pilot/agent/executing/popen.py | 1 + src/radical/pilot/db/database.py | 3 +- src/radical/pilot/pilot_manager.py | 3 +- src/radical/pilot/session.py | 71 ++++++++++---- src/radical/pilot/task_manager.py | 2 +- src/radical/pilot/utils/component.py | 108 ++++++++------------- src/radical/pilot/utils/session.py | 2 +- src/radical/pilot/worker/update.py | 9 +- 11 files changed, 137 insertions(+), 138 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index b3b6fa63fe..1f6f417f61 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -15,7 +15,7 @@ import radical.utils as ru # ------------------------------------------------------------------------------ # -def main(cfg): +def main(sid, reg_addr, uid): ''' This thin wrapper starts a ZMQ bridge. It expects a single argument: a config to use for the bridge's configuration. The config must contain: @@ -53,8 +53,9 @@ def main(cfg): The config file may contain other entries which are passed to the bridge and are interpreted by the bridge implementation. - After startup, the bridge's communication endpoint URLs are stored in a - file `$uid.cfg`, in the form (shown for pubsub and queue type bridges): + After startup, the bridge's communication endpoint URLs are stored in the + sessions registry under `bridges.`, in the form (shown for + pubsub and queue type bridges): { 'uid': '$bridge.uid', @@ -72,9 +73,9 @@ def main(cfg): can obtain the respective bridge addresses automatically. This also holds for command line tools like: - > radical-pilot-bridge command.cfg [1] - > radical-pilot-sub command foo & [2] - > radical-pilot-pub command foo bar [3] + > radical-pilot-bridge sid reg_addr test_pubsub.0000 [1] + > radical-pilot-sub sid reg_addr test_pubsub.0000 foo & [2] + > radical-pilot-pub sid reg_addr test_pubsub.0000 foo bar [3] [1] establishes the pubsub channel 'command' [2] connect to the command channel, subscribe for topic `foo` @@ -82,31 +83,32 @@ def main(cfg): ''' # basic setup: cfg, logger and profiler - log = ru.Logger(name=cfg.uid, ns='radical.pilot', path=cfg.path) - prof = ru.Profiler(name=cfg.uid, ns='radical.pilot', path=cfg.path) + log = ru.Logger(name=uid, ns='radical.pilot') + prof = ru.Profiler(name=uid, ns='radical.pilot') try: - prof.prof('bridge_start', uid=cfg.uid) + prof.prof('bridge_start', uid=uid) prof.disable() - wrapped_main(cfg, log, prof) + wrapped_main(sid, reg_addr, uid, log, prof) finally: prof.enable() - prof.prof('bridge_stop', uid=cfg.uid) + prof.prof('bridge_stop', uid=uid) -def wrapped_main(cfg, log, prof): +# ------------------------------------------------------------------------------ +# +def wrapped_main(sid, reg_addr, uid, log, prof): term = mt.Event() + reg = ru.zmq.RegistryClient(url=reg_addr, pwd=sid) - spt.setproctitle('rp.%s' % cfg.uid) + spt.setproctitle('rp.%s' % uid) # create the bridge, store connection addresses in FS, and begin to work - bridge = ru.zmq.Bridge.create(cfg) + bridge = ru.zmq.Bridge.create(sid, reg_addr, uid) - ru.write_json('%s/%s.cfg' % (cfg.path, cfg.uid), - {'uid' : cfg.uid, - bridge.type_in : str(bridge.addr_in), - bridge.type_out: str(bridge.addr_out)}) + reg['bridges.%s' % uid] = {bridge.type_in : str(bridge.addr_in), + bridge.type_out: str(bridge.addr_out)} bridge.start() @@ -151,17 +153,17 @@ def wrapped_main(cfg, log, prof): # if __name__ == "__main__": - if len(sys.argv) != 2: + if len(sys.argv) != 4: sys.stderr.write('error: invalid arguments\n' - 'usage: %s \n' % sys.argv[0]) + 'usage: %s \n' % sys.argv[0]) raise RuntimeError('invalid arguments: %s' % sys.argv) - fname = sys.argv[1] - cfg = ru.Config(path=fname) - path = '%s/%s' % (cfg.path, cfg.uid) + sid = sys.argv[1] + reg_addr = sys.argv[2] + uid = sys.argv[3] - ru.daemonize(main=main, args=[cfg], stdout='%s.out' % path, - stderr='%s.err' % path) + ru.daemonize(main=main, args=[sid, reg_addr, uid], + stdout='%s.out' % uid, stderr='%s.err' % uid) sys.exit(0) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 840338e0aa..359e324887 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -43,6 +43,7 @@ def __init__(self, cfg, session): self._uid = 'agent.0' self._cfg = cfg self._pid = cfg.pid + self._sid = cfg.sid self._pmgr = cfg.pmgr self._pwd = cfg.pilot_sandbox self._session = session @@ -60,9 +61,10 @@ def __init__(self, cfg, session): reg_uid = 'radical.pilot.reg.%s' % self._uid self._reg_service = ru.zmq.Registry(uid=reg_uid) self._reg_service.start() + self._reg_addr = self._reg_service.addr # let all components know where to look for the registry - self._cfg['reg_addr'] = self._reg_service.addr + self._cfg['reg_addr'] = self._reg_addr # connect to MongoDB for state push/pull self._connect_db() @@ -76,7 +78,7 @@ def __init__(self, cfg, session): # expose heartbeat channel to sub-agents, bridges and components, # and start those - self._cmgr = rpu.ComponentManager(self._cfg) + self._cmgr = rpu.ComponentManager(self._reg_addr) self._cfg.heartbeat = self._cmgr.cfg.heartbeat self._cmgr.start_bridges() @@ -147,7 +149,7 @@ def _connect_db(self): self._cfg.dburl = str(dburl) self._dbs = DBSession(sid=self._cfg.sid, dburl=self._cfg.dburl, - cfg=self._cfg, log=self._log) + log=self._log) # -------------------------------------------------------------------------- # @@ -405,7 +407,7 @@ def _start_services(self): # spawn the sub-agent cmdline = './%s' % ls_name - self._log.info('create services: %s' % cmdline) + self._log.info('create services: %s', cmdline) ru.sh_callout_bg(cmdline, stdout='services.out', stderr='services.err') self._log.debug('services started done') @@ -523,7 +525,7 @@ def _start_sub_agents(self): # spawn the sub-agent cmdline = launch_script - self._log.info ('create sub-agent %s: %s' % (sa, cmdline)) + self._log.info ('create sub-agent %s: %s', sa, cmdline) ru.sh_callout_bg(cmdline, stdout='%s.out' % sa, stderr='%s.err' % sa) diff --git a/src/radical/pilot/agent/agent_n.py b/src/radical/pilot/agent/agent_n.py index fbe906305b..0b91533d9b 100644 --- a/src/radical/pilot/agent/agent_n.py +++ b/src/radical/pilot/agent/agent_n.py @@ -20,10 +20,12 @@ class Agent_n(rpu.Worker): # def __init__(self, cfg, session): - self._cfg = cfg - self._pid = cfg.pid - self._pmgr = cfg.pmgr - self._pwd = cfg.pilot_sandbox + self._cfg = cfg + self._pid = cfg.pid + self._pmgr = cfg.pmgr + self._pwd = cfg.pilot_sandbox + self._sid = cfg.sid + self._reg_addr = cfg.reg_addr # log / profile via session until component manager is initialized self._session = session @@ -39,7 +41,7 @@ def __init__(self, cfg, session): # expose heartbeat channel to sub-agents, bridges and components, # and start those - self._cmgr = rpu.ComponentManager(self._cfg) + self._cmgr = rpu.ComponentManager(self._reg_addr) self._cfg.heartbeat = self._cmgr.cfg.heartbeat self._cmgr.start_bridges() diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index e99ce8e803..d2ca5a3659 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -567,6 +567,7 @@ def _get_rp_env(self, task): # ret += 'export RP_LFS="%s"\n' % self.lfs ret += 'export RP_GTOD="%s"\n' % self.gtod ret += 'export RP_PROF="%s"\n' % self.prof + ret += 'export RP_REGISTRY_URL="%s"\n' % self.cfg.reg_addr if self._prof.enabled: ret += 'export RP_PROF_TGT="%s/%s.prof"\n' % (sbox, tid) diff --git a/src/radical/pilot/db/database.py b/src/radical/pilot/db/database.py index 9b0592dfaf..fc1823bc89 100644 --- a/src/radical/pilot/db/database.py +++ b/src/radical/pilot/db/database.py @@ -18,7 +18,7 @@ class DBSession(object): # -------------------------------------------------------------------------- # - def __init__(self, sid, dburl, cfg, log, connect=True): + def __init__(self, sid, dburl, log, connect=True): ''' Creates a new session @@ -72,7 +72,6 @@ def __init__(self, sid, dburl, cfg, log, connect=True): self._c.insert({'type' : 'session', '_id' : sid, 'uid' : sid, - 'cfg' : cfg.as_dict(), 'created' : self._created, 'connected' : self._connected}) self._can_remove = True diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 591d5df154..5992df3872 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -128,6 +128,7 @@ def __init__(self, session, uid=None, cfg='default'): cfg.base = session.base cfg.path = session.path cfg.dburl = session.dburl + cfg.reg_addr = session.reg_addr cfg.heartbeat = session.cfg.heartbeat cfg.client_sandbox = session._get_client_sandbox() @@ -138,7 +139,7 @@ def __init__(self, session, uid=None, cfg='default'): self._rep.info('< Date: Wed, 12 Apr 2023 16:14:55 +0200 Subject: [PATCH 033/171] superficial fixes --- src/radical/pilot/pilot_manager.py | 1 - src/radical/pilot/session.py | 34 +++++++++++++++--------------- src/radical/pilot/task_manager.py | 1 - 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 68db863afb..b856c4dd8f 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -125,7 +125,6 @@ def __init__(self, session, uid=None, cfg='default'): cfg.uid = self._uid cfg.owner = self._uid cfg.sid = session.uid - cfg.base = session.base cfg.path = session.path cfg.heartbeat = session.cfg.heartbeat cfg.client_sandbox = session._get_client_sandbox() diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index e1214d02c9..6ed33e651a 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -129,8 +129,8 @@ def __init__(self, proxy_url=None, proxy_host=None, uid=None, self._closed = False self._t_start = time.time() - self._proxy = None # proxy client instance - self._reg = None # registry client instance + self._proxy = None # proxy client instance + self._reg = None # registry client instance self._pmgrs = dict() # map IDs to pmgr instances self._tmgrs = dict() # map IDs to tmgr instances @@ -140,6 +140,14 @@ def __init__(self, proxy_url=None, proxy_host=None, uid=None, if uid: self._uid = uid else : self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) + self._init_cfg(cfg) + self._init_registry() + self._init_proxy(proxy_url, proxy_host) + + if self._role == self._PRIMARY: + self._rep.info ('<>ok\n') - # dump json json = {'session' : self.as_dict(), 'pmgr' : list(), @@ -503,6 +498,11 @@ def close(self, **kwargs): tgt = '%s/%s.json' % (self.path, self.uid) ru.write_json(json, tgt) + if self.closed and self.created: + self._rep.info('<>ok\n') + # -------------------------------------------------------------------------- # @@ -574,7 +574,7 @@ def __str__(self): # @property def primary(self): - return self._primary + return self._role == self._PRIMARY # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index f994538250..5b4d7b69de 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -137,7 +137,6 @@ def __init__(self, session, cfg='default', scheduler=None, uid=None): cfg.uid = self._uid cfg.owner = self._uid cfg.sid = session.uid - cfg.base = session.base cfg.path = session.path cfg.heartbeat = session.cfg.heartbeat cfg.client_sandbox = session._get_client_sandbox() From 01f48e51871de21ffbb3d5f07f2e56f2147e1fbc Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 22 Apr 2023 15:01:32 +0200 Subject: [PATCH 034/171] snapshot --- bin/radical-pilot-agent | 2 +- bin/radical-pilot-component | 2 +- bin/radical-pilot-run-session | 2 +- bin/radical-pilot-worker | 136 ++++++++++++++++ src/radical/pilot/pilot_manager.py | 1 + src/radical/pilot/raptor/master.py | 3 +- src/radical/pilot/session.py | 231 ++++++++++++++++----------- src/radical/pilot/task_manager.py | 1 + src/radical/pilot/utils/component.py | 10 +- 9 files changed, 289 insertions(+), 99 deletions(-) create mode 100755 bin/radical-pilot-worker diff --git a/bin/radical-pilot-agent b/bin/radical-pilot-agent index 33cec7c9a1..676f806770 100755 --- a/bin/radical-pilot-agent +++ b/bin/radical-pilot-agent @@ -46,7 +46,7 @@ def bootstrap_3(aid): cfg.base = os.getcwd() # start a non-primary session (the agents will own their cmgrs) - session = rp.Session(cfg=cfg, _primary=False) + session = rp.Session(cfg=cfg, _role=rp.Session._AGENT) if aid == 'agent.0': agent = rp.Agent_0(cfg, session) else : agent = rp.Agent_n(cfg, session) diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 778fb4037b..e9610ef682 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -75,7 +75,7 @@ def wrapped_main(cfg, log, prof): spt.setproctitle('rp.%s' % cfg.uid) # start a non-primary session - session = rp.Session(cfg=cfg, _primary=False) + session = rp.Session(cfg=cfg, _role=rp.Session._DEFAULT) # create the component and begin to work comp = rp.utils.Component.create(cfg, session) diff --git a/bin/radical-pilot-run-session b/bin/radical-pilot-run-session index 1e4bf96850..a45b2163b9 100755 --- a/bin/radical-pilot-run-session +++ b/bin/radical-pilot-run-session @@ -24,7 +24,7 @@ def run_record(rec): dburl = s_dict.get('dburl') rep.info('session dburl: %s' % dburl) - session = rp.Session(database_url=dburl) + session = rp.Session() rep.ok('session uid : %s' % session.uid) pmgr = rp.PilotManager(session=session) diff --git a/bin/radical-pilot-worker b/bin/radical-pilot-worker new file mode 100755 index 0000000000..b16c2053e1 --- /dev/null +++ b/bin/radical-pilot-worker @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 + + +import sys +import time + +import threading as mt +import setproctitle as spt + +import radical.utils as ru +import radical.pilot as rp + +dh = ru.DebugHelper() + + +# ------------------------------------------------------------------------------ +# +def main(cfg): + ''' + This thin wrapper starts an RCT task overlay worker. It expects a single + argument: a config to use for the worker's configuration. + That config must contain: + + - 'uid' : UID of worker instance (unique to the hosting session) + - 'path': sandbox for log files etc. + + If the config contains a `heartbeat` section, that section must be formatted + as follows: + + { + 'from' : 'uid', + 'pub' : 'addr_pub', + 'sub' : 'addr_sub', + 'interval': , + 'timeout' : + } + + If that section exists, heartbeats are used to manage the worker's lifetime: + the lifetime of this worker is then dependent on receiving heartbeats from + the given `uid`: after `timeout` seconds of no heartbeats arriving, the + worker will terminate. The worker itself will publish heartbeats every + `interval` seconds on the heartbeat channel under its own uid. + + If the heartbeat section is not present in the config file, the worker's + lifetime is expected to be explicitly managed, i.e., that this wrapper + process hosting the worker is terminated externally. + + The config file may contain other entries which are passed to the worker and + are interpreted by the component implementation. + ''' + + # basic setup: cfg, logger and profiler + log = ru.Logger(name=cfg.uid, ns='radical.pilot', path=cfg.path) + prof = ru.Profiler(name=cfg.uid, ns='radical.pilot', path=cfg.path) + + try: + prof.prof('worker_start', uid=cfg.uid) + prof.disable() + wrapped_main(cfg, log, prof) + except: + prof.enable() + prof.prof('worker_fail', uid=cfg.uid) + finally: + prof.enable() + prof.prof('worker_stop', uid=cfg.uid) + + +def wrapped_main(cfg, log, prof): + + term = mt.Event() + + spt.setproctitle('rp.%s' % cfg.uid) + + # start a non-primary session + session = rp.Session(cfg=cfg, _role=rp.Session._DEFAULT) + + # create the component and begin to work + worker = rp.utils.Component.create(cfg, session) + worker.start() + + # component runs - send heartbeats so that cmgr knows about it + hb_pub = ru.zmq.Publisher ('heartbeat', cfg.heartbeat.addr_pub) + + def hb_beat_cb(): + hb_pub.put('heartbeat', msg={'uid': cfg.uid}) + + def hb_term_cb(hb_uid): + worker.stop() + term.set() + return None + + hb = ru.Heartbeat(uid=cfg.uid, + timeout=cfg.heartbeat.timeout, + interval=cfg.heartbeat.interval, + beat_cb=hb_beat_cb, + term_cb=hb_term_cb, + log=log) + hb.start() + + # register cmgr heartbeat by beating once + hb.beat(uid=cfg.cmgr) + + # record cmgr heartbeats + def hb_sub_cb(topic, msg): + if msg['uid'] == cfg.cmgr: + hb.beat(uid=cfg.cmgr) + + ru.zmq.Subscriber('heartbeat', cfg.heartbeat.addr_sub, + topic='heartbeat', cb=hb_sub_cb, + log=log, prof=prof) + + # all is set up - we can sit idle 'til end of time. + while not term.is_set(): + time.sleep(1) + + +# ------------------------------------------------------------------------------ +# +if __name__ == "__main__": + + if len(sys.argv) != 2: + sys.stderr.write('error: invalid arguments\n' + 'usage: %s \n' % sys.argv[0]) + raise RuntimeError('invalid arguments: %s' % sys.argv) + + fname = sys.argv[1] + cfg = ru.Config(path=fname) + path = '%s/%s' % (cfg.path, cfg.uid) + + # NOTE: this script runs as an RP task and will *not* daemonize + + main(cfg) + + +# ------------------------------------------------------------------------------ + diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index b856c4dd8f..90d8d5041b 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -126,6 +126,7 @@ def __init__(self, session, uid=None, cfg='default'): cfg.owner = self._uid cfg.sid = session.uid cfg.path = session.path + cfg.reg_addr = session.cfg.reg_addr cfg.heartbeat = session.cfg.heartbeat cfg.client_sandbox = session._get_client_sandbox() diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index 5b4ebae473..eb3107a072 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -57,7 +57,8 @@ def __init__(self, cfg=None): self._hb_timeout = 15 # consider worker dead after 15 seconds cfg = self._get_config(cfg) - self._session = Session(cfg=cfg, uid=cfg.sid, _primary=False) + cfg = self._get_config(cfg) + self._session = Session(cfg=cfg, uid=cfg.sid, _role=Session._DEFAULT) rpu.Component.__init__(self, cfg, self._session) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 9122c665b7..51a5bf861d 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -51,9 +51,9 @@ class Session(rs.Session): # agent. # - default: any other session instance, for example such as created by # components in the client or agent module. - _PRIMARY = 0 - _AGENT = 1 - _DEFAULT = 2 + _PRIMARY = 'primary' + _AGENT = 'agent' + _DEFAULT = 'default' # -------------------------------------------------------------------------- # @@ -126,6 +126,7 @@ def __init__(self, proxy_url=None, proxy_host=None, uid=None, self._close_options = _CloseOptions(close_options) self._role = _role + print(1, self._role) self._closed = False self._t_start = time.time() @@ -136,11 +137,10 @@ def __init__(self, proxy_url=None, proxy_host=None, uid=None, self._tmgrs = dict() # map IDs to tmgr instances self._cmgr = None # only primary sessions have a cmgr - if uid: self._uid = uid else : self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) - self._init_cfg(cfg) + self._init_cfg(uid, cfg) self._init_registry() self._init_proxy(proxy_url, proxy_host) @@ -158,15 +158,19 @@ def __init__(self, proxy_url=None, proxy_host=None, uid=None, # now we have config and uid - initialize base class (saga session) rs.Session.__init__(self, uid=self._uid) + # start bridges and components + self._init_components() + # at this point we have a bridge connection, logger, etc, and are done self._prof.prof('session_ok', uid=self._uid) if self._role == self._PRIMARY: self._rep.ok('>>ok\n') + # -------------------------------------------------------------------------- # - def _init_cfg(self, cfg): + def _init_cfg(self, uid, cfg): # NOTE: `cfg_name` and `cfg` are overloaded, the user cannot point to # a predefined config and amend it at the same time. This might @@ -180,13 +184,19 @@ def _init_cfg(self, cfg): self._cfg = ru.Config('radical.pilot.session', name=cfg_name, cfg=cfg) self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) - # ensure we have basic settings - if self._cfg.sid: - assert(self._uid == self._cfg.sid) + if uid and self._cfg.sid: + assert uid == self._cfg.sid + + if uid: + self._uid = uid + elif self._cfg.sid: + self._uid = self._cfg.sid + else: + self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) # session path: where to store logfiles etc. if self._cfg.path: self._path = self._cfg.path - else : self._path = '%s/%s' % (os.getcwd(), self._cfg.sid) + else : self._path = '%s/%s' % (os.getcwd(), self.uid) # change RU defaults to point logfiles etc. to the session sandbox def_cfg = ru.DefaultConfig() @@ -199,15 +209,36 @@ def _init_cfg(self, cfg): self._log = self._get_logger (name=self._uid, level=self._cfg.get('debug')) + self._log.debug('=== 1 %s', self._role) + self._log.debug('=== Session(%s, %s) [%s]', uid, self._role, + self._cfg.sid) + self._log.debug('\n'.join(ru.get_stacktrace())) self._prof.prof('session_start', uid=self._uid) + if self._role == self._PRIMARY: + self._rep.info ('< Date: Sun, 23 Apr 2023 14:13:26 +0200 Subject: [PATCH 035/171] snap --- bin/radical-pilot-bridge | 25 ++++++++++++++----------- src/radical/pilot/session.py | 3 --- src/radical/pilot/utils/component.py | 5 ++++- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index 1f6f417f61..b21581dd57 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -82,7 +82,7 @@ def main(sid, reg_addr, uid): [3] connect to the command channel, send messages for topic `foo` ''' - # basic setup: cfg, logger and profiler + # basic setup: logger and profiler log = ru.Logger(name=uid, ns='radical.pilot') prof = ru.Profiler(name=uid, ns='radical.pilot') @@ -90,6 +90,7 @@ def main(sid, reg_addr, uid): prof.prof('bridge_start', uid=uid) prof.disable() wrapped_main(sid, reg_addr, uid, log, prof) + finally: prof.enable() prof.prof('bridge_stop', uid=uid) @@ -102,10 +103,12 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term = mt.Event() reg = ru.zmq.RegistryClient(url=reg_addr, pwd=sid) + hb_cfg = reg['hb_cfg'] + spt.setproctitle('rp.%s' % uid) # create the bridge, store connection addresses in FS, and begin to work - bridge = ru.zmq.Bridge.create(sid, reg_addr, uid) + bridge = ru.zmq.Bridge.create(uid) reg['bridges.%s' % uid] = {bridge.type_in : str(bridge.addr_in), bridge.type_out: str(bridge.addr_out)} @@ -113,34 +116,34 @@ def wrapped_main(sid, reg_addr, uid, log, prof): bridge.start() # bridge runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', cfg.heartbeat.addr_pub, + hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.heartbeat.addr_pub, log=log, prof=prof) def hb_beat_cb(): - hb_pub.put('heartbeat', msg={'uid': cfg.uid}) + hb_pub.put('heartbeat', msg={'uid': uid}) def hb_term_cb(hb_uid): bridge.stop() term.set() return None - hb = ru.Heartbeat(uid=cfg.uid, - timeout=cfg.heartbeat.timeout, - interval=cfg.heartbeat.interval, + hb = ru.Heartbeat(uid=hb_cfg.uid, + timeout=hb_cfg.heartbeat.timeout, + interval=hb_cfg.heartbeat.interval, beat_cb=hb_beat_cb, term_cb=hb_term_cb, log=log) hb.start() # register cmgr heartbeat by beating once - hb.beat(uid=cfg.cmgr) + hb.beat(uid=hb_cfg.cmgr) # record cmgr heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == cfg.cmgr: - hb.beat(uid=cfg.cmgr) + if msg['uid'] == hb_cfg.cmgr: + hb.beat(uid=hb_cfg.cmgr) - ru.zmq.Subscriber('heartbeat', cfg.heartbeat.addr_sub, + ru.zmq.Subscriber('heartbeat', hb_cfg.heartbeat.addr_sub, topic='heartbeat', cb=hb_sub_cb, log=log, prof=prof) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 671fe8a88b..6b22f2e215 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -235,9 +235,6 @@ def _initialize_primary(self, dburl): self._cmgr.start_components(self._cfg.components) self._reg_service.dump() - # expose the cmgr's heartbeat channel to anyone who wants to use it - self._cfg.heartbeat = self._cmgr.cfg.heartbeat # pylint: disable=E1101 - self._rec = False if self._cfg.record: diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 56e0317888..73f65b7678 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -82,7 +82,7 @@ def __init__(self, sid, reg_addr): 'stall_hwm' : 1, 'bulk_size' : 0, 'path' : self._cfg.path}) - self._hb_bridge = ru.zmq.PubSub(bcfg) + self._hb_bridge = ru.zmq.PubSub(channel='heartbeat', kind='pubsub') self._hb_bridge.start() self._cfg.heartbeat.addr_pub = str(self._hb_bridge.addr_pub) @@ -109,6 +109,9 @@ def __init__(self, sid, reg_addr): self._hb.wait_startup(self._uid, self._cfg.heartbeat.timeout) self._log.info('heartbeat system up') + # publish heartbeat information in registry + self._reg['heartbeat'] = self._cfg.heartbeat + # -------------------------------------------------------------------------- # From 8d262a32200bc495b06983384fa58c5cac3963d8 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 24 Apr 2023 03:41:38 +0200 Subject: [PATCH 036/171] snap --- docs/architecture/registry.txt | 46 ++++++++++++ docs/architecture/zmq/server.py | 124 ++++++++++++++++++++++++++++++++ src/radical/pilot/session.py | 3 +- 3 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 docs/architecture/registry.txt create mode 100755 docs/architecture/zmq/server.py diff --git a/docs/architecture/registry.txt b/docs/architecture/registry.txt new file mode 100644 index 0000000000..f1fbb48437 --- /dev/null +++ b/docs/architecture/registry.txt @@ -0,0 +1,46 @@ + +registry = { + '' : { + '' + 'cfg' : { + 'resources' : { ... }, + }, + 'pilot.0000': { + 'description': { ... }, + 'rm_info' : { ... }, + 'agent.0' : { '' } + } + } +} + +SUB_REGISTRY = { + 'cmgr.0000' : { + 'heartbeat' : { + 'addr_pub': ... , + 'addr_sub': ... , + 'cfg' : { + 'interval': ... , + 'timeout' : ... + } + }, + 'bridges' : { + 'state_pubsub': { + 'addr_pub': ... , + 'addr_sub': ... , + 'cfg' : { + 'log_lvl' : ..., + 'hwm' : ..., + 'bulk_size': ... + } + } + }, + 'components': { + 'staging.0001': { + 'cfg': { + 'log_lvl': ... + } + } + } + } +} + diff --git a/docs/architecture/zmq/server.py b/docs/architecture/zmq/server.py new file mode 100755 index 0000000000..a2bc45b225 --- /dev/null +++ b/docs/architecture/zmq/server.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + +import zmq +import msgpack + +import threading as mt + + +# ------------------------------------------------------------------------------ +# +class Server(object): + + # -------------------------------------------------------------------------- + # + def __init__(self) -> None: + + self._ctx = zmq.Context() + self._url = None + self._thread = None + self._term = mt.Event() + + + # -------------------------------------------------------------------------- + # + @property + def url(self): + return self._url + + + # -------------------------------------------------------------------------- + # + def listen(self, url: str = None): + + if not url: + url = 'tcp://*:*' + + if self._url: + raise RuntimeError('already connected at %s' % self._url) + + self._sock = self._ctx.socket(zmq.SERVER) + self._sock.bind(url) + + self._url = self._sock.getsockopt(zmq.LAST_ENDPOINT) + + self._thread = mt.Thread(target=self._work) + self._thread.daemon = True + self._thread.start() + + + # -------------------------------------------------------------------------- + # + def _work(self): + + poller = zmq.Poller() + poller.register(self._sock, zmq.POLLIN) + + while not self._term: + + info = poller.poll() + if info: + msg = msgpack.unpackb(self._sock.recv()) + print('< %s' % msg) + msg['foo': 1] + self._sock.send(msgpack.packb(msg)) + print('> %s' % msg) + + +# ------------------------------------------------------------------------------ +# +class Client(object): + + # -------------------------------------------------------------------------- + # + def __init__(self) -> None: + + self._ctx = zmq.Context() + self._url = None + + + # -------------------------------------------------------------------------- + # + @property + def url(self): + return self._url + + + # -------------------------------------------------------------------------- + # + def connect(self, url: str = None): + + if self._url: + raise RuntimeError('already connected at %s' % self._url) + + self._sock = self._ctx.socket(zmq.CLIENT) + self._sock.connect(url) + + self._url = self._sock.getsockopt(zmq.LAST_ENDPOINT) + + + # -------------------------------------------------------------------------- + # + def work(self): + + for i in range(3): + + msg = {'cnt': i} + self._sock.send(msgpack.packb(msg)) + print('> %s' % msg) + + rep = msgpack.unpackb(self._sock.recv()) + print('> %s' % rep) + + +# ------------------------------------------------------------------------------ +# +if __name__ == '__main__': + + server = Server() + server.listen() + + client = Client() + client.connect(server.url) + + diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 337f9707f1..3c398c6ed8 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -38,9 +38,10 @@ class Session(rs.Session): # sessions of RP client or agent modules), but all components need to call # the sessions `heartbeat()` method at regular intervals. - # the reporter is an applicataion-level singleton + # the reporter is an application-level singleton _reporter = None + # -------------------------------------------------------------------------- # def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, From eececf21bdf39bec7f950892e7ad2532fc1591a8 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 3 May 2023 23:01:07 +0200 Subject: [PATCH 037/171] snap --- bin/radical-pilot-bridge | 19 +++++++------ docs/architecture/registry.txt | 4 +-- .../pilot/configs/session_default.json | 2 +- src/radical/pilot/session.py | 13 ++++++--- src/radical/pilot/utils/component.py | 27 ++++++++++--------- 5 files changed, 36 insertions(+), 29 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index b21581dd57..59f0195823 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -102,8 +102,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term = mt.Event() reg = ru.zmq.RegistryClient(url=reg_addr, pwd=sid) - - hb_cfg = reg['hb_cfg'] + cfg = ru.TypedDict(reg['cfg']) spt.setproctitle('rp.%s' % uid) @@ -116,7 +115,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): bridge.start() # bridge runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.heartbeat.addr_pub, + hb_pub = ru.zmq.Publisher('heartbeat', cfg.heartbeat.addr_pub, log=log, prof=prof) def hb_beat_cb(): @@ -127,23 +126,23 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term.set() return None - hb = ru.Heartbeat(uid=hb_cfg.uid, - timeout=hb_cfg.heartbeat.timeout, - interval=hb_cfg.heartbeat.interval, + hb = ru.Heartbeat(uid=cfg.uid, + timeout=cfg.heartbeat.timeout, + interval=cfg.heartbeat.interval, beat_cb=hb_beat_cb, term_cb=hb_term_cb, log=log) hb.start() # register cmgr heartbeat by beating once - hb.beat(uid=hb_cfg.cmgr) + hb.beat(uid=cfg.cmgr) # record cmgr heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == hb_cfg.cmgr: - hb.beat(uid=hb_cfg.cmgr) + if msg['uid'] == cfg.cmgr: + hb.beat(uid=cfg.cmgr) - ru.zmq.Subscriber('heartbeat', hb_cfg.heartbeat.addr_sub, + ru.zmq.Subscriber('heartbeat', cfg.heartbeat.addr_sub, topic='heartbeat', cb=hb_sub_cb, log=log, prof=prof) diff --git a/docs/architecture/registry.txt b/docs/architecture/registry.txt index f1fbb48437..c4eee93175 100644 --- a/docs/architecture/registry.txt +++ b/docs/architecture/registry.txt @@ -2,9 +2,7 @@ registry = { '' : { '' - 'cfg' : { - 'resources' : { ... }, - }, + 'heartbeat' : { ... }, 'pilot.0000': { 'description': { ... }, 'rm_info' : { ... }, diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index e6481bb34e..585f82b63d 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -4,7 +4,7 @@ # a functional rp session, both on the client and on the agent side. { "dburl" : "${RADICAL_PILOT_DBURL}", - "session_base" : "${RADICAL_PILOT_SESSION_BASE:$PWD}", + "base" : "${RADICAL_PILOT_BASE:$PWD}", "record" : "${RADICAL_PILOT_SESSION_RECORD}", "bulk_time" : 1.0, diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 3c398c6ed8..190829e93d 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -160,9 +160,16 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, assert self._cfg.reg_addr self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr, pwd=self._uid) - # store session and resource configs in the registry - self._reg['cfg'] = self._cfg - self._reg['cfg.resources'] = self._rcfgs + # store some session and resource config data in the registry + self._reg['cfg'] = {'base' : self._cfg.base, + 'path' : self._cfg.path, + 'dburl' : self._cfg.dburl, + 'reg_addr' : self._cfg.reg_addr, + 'client_sandbox': self._cfg.client_sandbox, + 'client_sandbox': self._cfg.client_sandbox, + 'heartbeat' : self._cfg.heartbeat, + } + self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index feb88c0ede..af533c3256 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -62,6 +62,7 @@ def __init__(self, sid, reg_addr): self._reg = ru.zmq.RegistryClient(url=reg_addr, pwd=sid) self._cfg = ru.Config(from_dict=self._reg['cfg']) + self._reg.dump('init') self._uid = ru.generate_id('cmgr.%(item_counter)04d', ru.ID_CUSTOM, ns=self._sid) self._uids = [self._uid] # uids to track hartbeats for (incl. own) @@ -82,35 +83,37 @@ def __init__(self, sid, reg_addr): 'stall_hwm' : 1, 'bulk_size' : 0, 'path' : self._cfg.path}) - self._hb_bridge = ru.zmq.PubSub(channel='heartbeat', kind='pubsub') + self._hb_bridge = ru.zmq.PubSub(channel='heartbeat') self._hb_bridge.start() - self._cfg.heartbeat.addr_pub = str(self._hb_bridge.addr_pub) - self._cfg.heartbeat.addr_sub = str(self._hb_bridge.addr_sub) + hb_cfg = ru.TypedDict(self._reg['cfg.heartbeat']) + hb_cfg.addr_pub = str(self._hb_bridge.addr_pub) + hb_cfg.addr_sub = str(self._hb_bridge.addr_sub) + + # publish heartbeat information in registry + self._cfg.heartbeat = hb_cfg + self._reg['cfg.heartbeat'] = hb_cfg # runs a HB monitor on that channel self._hb = ru.Heartbeat(uid=self.uid, - timeout=self._cfg.heartbeat.timeout, - interval=self._cfg.heartbeat.interval, + timeout=hb_cfg['timeout'], + interval=hb_cfg['interval'], beat_cb=self._hb_beat_cb, # on every heartbeat term_cb=self._hb_term_cb, # on termination log=self._log) - self._hb_pub = ru.zmq.Publisher('heartbeat', - self._cfg.heartbeat.addr_pub, + self._hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg['addr_pub'], log=self._log, prof=self._prof) - self._hb_sub = ru.zmq.Subscriber('heartbeat', - self._cfg.heartbeat.addr_sub, + self._hb_sub = ru.zmq.Subscriber('heartbeat', hb_cfg['addr_sub'], topic='heartbeat', cb=self._hb_sub_cb, log=self._log, prof=self._prof) + # confirm the bridge being usable by listening to our own heartbeat self._hb.start() - self._hb.wait_startup(self._uid, self._cfg.heartbeat.timeout) + self._hb.wait_startup(self._uid, hb_cfg['timeout']) self._log.info('heartbeat system up') - # publish heartbeat information in registry - self._reg['heartbeat'] = self._cfg.heartbeat # -------------------------------------------------------------------------- From 63d4a3448d8373dcfc3d851bd9a893665d519a79 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sun, 7 May 2023 23:09:32 +0200 Subject: [PATCH 038/171] switch to registry configs instead of config files --- bin/radical-pilot-bridge | 36 +++---- bin/radical-pilot-component | 62 ++++++------ src/radical/pilot/agent/agent_0.py | 16 +-- src/radical/pilot/agent/agent_n.py | 12 ++- src/radical/pilot/agent/executing/base.py | 36 +++---- src/radical/pilot/agent/executing/popen.py | 3 +- .../pilot/agent/resource_manager/base.py | 29 +++--- src/radical/pilot/agent/scheduler/base.py | 18 +++- src/radical/pilot/configs/agent_default.json | 7 +- .../pilot/configs/agent_default_sa.json | 7 +- .../pilot/configs/session_default.json | 9 +- src/radical/pilot/pilot_manager.py | 6 +- src/radical/pilot/raptor/master.py | 15 +-- src/radical/pilot/session.py | 7 +- src/radical/pilot/task_manager.py | 18 ++-- src/radical/pilot/utils/component.py | 97 +++++++++---------- src/radical/pilot/worker/stager.py | 2 +- src/radical/pilot/worker/update.py | 14 +-- 18 files changed, 206 insertions(+), 188 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index 59f0195823..b51959c297 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -4,6 +4,7 @@ __copyright__ = "Copyright 2014-2019, http://radical.rutgers.edu" __license__ = "MIT" +import os import sys import time @@ -27,10 +28,6 @@ def main(sid, reg_addr, uid): If the config contains a `heartbeat` section, that section must be formatted as follows: - RCT comm bridges can be monitored via heartbeats (using a bridge-less pubsub - channel). To enable that monitoring, the config should contains - a `heartbeat` section, that section must be formatted as follows: - { 'from' : 'uid', 'pub' : 'addr_pub', @@ -58,13 +55,11 @@ def main(sid, reg_addr, uid): pubsub and queue type bridges): { - 'uid': '$bridge.uid', 'pub': '$addr_pub', 'sub': '$addr_sub' } { - 'uid': '$bridge.uid', 'put': '$addr_put', 'get': '$addr_get' } @@ -83,8 +78,8 @@ def main(sid, reg_addr, uid): ''' # basic setup: logger and profiler - log = ru.Logger(name=uid, ns='radical.pilot') - prof = ru.Profiler(name=uid, ns='radical.pilot') + log = ru.Logger(name=uid, ns='radical.pilot', path=os.getcwd()) + prof = ru.Profiler(name=uid, ns='radical.pilot', path=os.getcwd()) try: prof.prof('bridge_start', uid=uid) @@ -102,20 +97,21 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term = mt.Event() reg = ru.zmq.RegistryClient(url=reg_addr, pwd=sid) - cfg = ru.TypedDict(reg['cfg']) + scfg = ru.TypedDict(reg['cfg']) + bcfg = ru.TypedDict(reg['bridges.%s.cfg' % uid]) spt.setproctitle('rp.%s' % uid) # create the bridge, store connection addresses in FS, and begin to work - bridge = ru.zmq.Bridge.create(uid) + bridge = ru.zmq.Bridge.create(uid, cfg=bcfg) - reg['bridges.%s' % uid] = {bridge.type_in : str(bridge.addr_in), - bridge.type_out: str(bridge.addr_out)} + reg['bridges.%s.%s' % (uid, bridge.type_in )] = str(bridge.addr_in) + reg['bridges.%s.%s' % (uid, bridge.type_out)] = str(bridge.addr_out) bridge.start() # bridge runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', cfg.heartbeat.addr_pub, + hb_pub = ru.zmq.Publisher('heartbeat', scfg.heartbeat.addr_pub, log=log, prof=prof) def hb_beat_cb(): @@ -126,23 +122,23 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term.set() return None - hb = ru.Heartbeat(uid=cfg.uid, - timeout=cfg.heartbeat.timeout, - interval=cfg.heartbeat.interval, + hb = ru.Heartbeat(uid=bcfg.uid, + timeout=scfg.heartbeat.timeout, + interval=scfg.heartbeat.interval, beat_cb=hb_beat_cb, term_cb=hb_term_cb, log=log) hb.start() # register cmgr heartbeat by beating once - hb.beat(uid=cfg.cmgr) + hb.beat(uid=bcfg.cmgr) # record cmgr heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == cfg.cmgr: - hb.beat(uid=cfg.cmgr) + if msg['uid'] == bcfg.cmgr: + hb.beat(uid=bcfg.cmgr) - ru.zmq.Subscriber('heartbeat', cfg.heartbeat.addr_sub, + ru.zmq.Subscriber('heartbeat', scfg.heartbeat.addr_sub, topic='heartbeat', cb=hb_sub_cb, log=log, prof=prof) diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index df9e35d91d..e66df6592c 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -4,6 +4,7 @@ __copyright__ = "Copyright 2014-2019, http://radical.rutgers.edu" __license__ = "MIT" +import os import sys import time @@ -18,7 +19,7 @@ dh = ru.DebugHelper() # ------------------------------------------------------------------------------ # -def main(cfg): +def main(sid, reg_addr, uid): ''' This thin wrapper starts a RCT component It expects a single argument: a config to use for the component's configuration. The config must contain: @@ -53,60 +54,67 @@ def main(cfg): and are interpreted by the component implementation. ''' - # basic setup: cfg, logger and profiler - log = ru.Logger(name=cfg.uid, ns='radical.pilot', path=cfg.path) - prof = ru.Profiler(name=cfg.uid, ns='radical.pilot', path=cfg.path) + # basic setup: logger and profiler + log = ru.Logger(name=uid, ns='radical.pilot', path=os.getcwd()) + prof = ru.Profiler(name=uid, ns='radical.pilot', path=os.getcwd()) try: - prof.prof('comp_start', uid=cfg.uid) + prof.prof('comp_start', uid=uid) prof.disable() - wrapped_main(cfg, log, prof) + wrapped_main(sid, reg_addr, uid, log, prof) + finally: prof.enable() - prof.prof('comp_stop', uid=cfg.uid) + prof.prof('comp_stop', uid=uid) -def wrapped_main(cfg, log, prof): +# ------------------------------------------------------------------------------ +# +def wrapped_main(sid, reg_addr, uid, log, prof): term = mt.Event() + reg = ru.zmq.RegistryClient(url=reg_addr, pwd=sid) + scfg = ru.TypedDict(reg['cfg']) + ccfg = ru.TypedDict(reg['components.%s.cfg' % uid]) - spt.setproctitle('rp.%s' % cfg.uid) + spt.setproctitle('rp.%s' % uid) # start a non-primary session - session = rp.Session(cfg=cfg, _primary=False) + session = rp.Session(cfg=scfg, _primary=False) # create the component and begin to work - comp = rp.utils.Component.create(cfg, session) + comp = rp.utils.Component.create(ccfg, session) comp.start() # component runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', cfg.heartbeat.addr_pub) #, log=log) + hb_pub = ru.zmq.Publisher('heartbeat', scfg.heartbeat.addr_pub, + log=log, prof=prof) def hb_beat_cb(): - hb_pub.put('heartbeat', msg={'uid': cfg.uid}) + hb_pub.put('heartbeat', msg={'uid': uid}) def hb_term_cb(hb_uid): comp.stop() term.set() return None - hb = ru.Heartbeat(uid=cfg.uid, - timeout=cfg.heartbeat.timeout, - interval=cfg.heartbeat.interval, + hb = ru.Heartbeat(uid=ccfg.uid, + timeout=scfg.heartbeat.timeout, + interval=scfg.heartbeat.interval, beat_cb=hb_beat_cb, term_cb=hb_term_cb, log=log) hb.start() # register cmgr heartbeat by beating once - hb.beat(uid=cfg.cmgr) + hb.beat(uid=ccfg.cmgr) # record cmgr heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == cfg.cmgr: - hb.beat(uid=cfg.cmgr) + if msg['uid'] == ccfg.cmgr: + hb.beat(uid=ccfg.cmgr) - ru.zmq.Subscriber('heartbeat', cfg.heartbeat.addr_sub, + ru.zmq.Subscriber('heartbeat', scfg.heartbeat.addr_sub, topic='heartbeat', cb=hb_sub_cb, log=log, prof=prof) @@ -119,17 +127,17 @@ def wrapped_main(cfg, log, prof): # if __name__ == "__main__": - if len(sys.argv) != 2: + if len(sys.argv) != 4: sys.stderr.write('error: invalid arguments\n' - 'usage: %s \n' % sys.argv[0]) + 'usage: %s \n' % sys.argv[0]) raise RuntimeError('invalid arguments: %s' % sys.argv) - fname = sys.argv[1] - cfg = ru.Config(path=fname) - path = '%s/%s' % (cfg.path, cfg.uid) + sid = sys.argv[1] + reg_addr = sys.argv[2] + uid = sys.argv[3] - ru.daemonize(main=main, args=[cfg], stdout='%s.out' % path, - stderr='%s.err' % path) + ru.daemonize(main=main, args=[sid, reg_addr, uid], + stdout='%s.out' % uid, stderr='%s.err' % uid) sys.exit(0) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index d4d46a3be1..e2ef3dcc84 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -66,6 +66,9 @@ def __init__(self, cfg: ru.Config, session: Session): self._reg_service.start() self._reg_addr = self._reg_service.addr + self._reg = ru.zmq.RegistryClient(url=self._reg_addr, pwd=self._sid) + self._reg['cfg'] = self._cfg + # let all components know where to look for the registry self._cfg['reg_addr'] = self._reg_addr @@ -81,11 +84,14 @@ def __init__(self, cfg: ru.Config, session: Session): # expose heartbeat channel to sub-agents, bridges and components, # and start those - self._cmgr = rpu.ComponentManager(self._reg_addr) - self._cfg.heartbeat = self._cmgr.cfg.heartbeat + self._cmgr = rpu.ComponentManager(self._sid, self._reg_addr, self._uid) + self._cfg.heartbeat = self._reg['cfg']['heartbeat'] + + ccfg = {'pid': self._pid, + 'log_lvl': 'debug'} # FIXME - self._cmgr.start_bridges() - self._cmgr.start_components() + self._cmgr.start_bridges(self._cfg.bridges) + self._cmgr.start_components(self._cfg.components, ccfg) # service tasks uids, which were launched self._service_uids_launched = list() @@ -348,8 +354,6 @@ def _write_sa_configs(self): tmp_cfg['aid'] = sa tmp_cfg['owner'] = 'agent.0' - ru.write_json(tmp_cfg, './%s.cfg' % sa) - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/agent/agent_n.py b/src/radical/pilot/agent/agent_n.py index 0b91533d9b..d46580f444 100644 --- a/src/radical/pilot/agent/agent_n.py +++ b/src/radical/pilot/agent/agent_n.py @@ -21,12 +21,14 @@ class Agent_n(rpu.Worker): def __init__(self, cfg, session): self._cfg = cfg + self._sid = cfg.sid self._pid = cfg.pid self._pmgr = cfg.pmgr self._pwd = cfg.pilot_sandbox self._sid = cfg.sid self._reg_addr = cfg.reg_addr + # log / profile via session until component manager is initialized self._session = session self._log = session._log @@ -35,17 +37,19 @@ def __init__(self, cfg, session): self._starttime = time.time() self._final_cause = None + self._reg = ru.zmq.RegistryClient(url=self._reg_addr, pwd=self._sid) + # this is the earliest point to sync bootstrap and agent profiles self._prof.prof('hostname', uid=self._pid, msg=ru.get_hostname()) self._prof.prof('sub_agent_start', uid=self._pid) # expose heartbeat channel to sub-agents, bridges and components, # and start those - self._cmgr = rpu.ComponentManager(self._reg_addr) - self._cfg.heartbeat = self._cmgr.cfg.heartbeat + self._cmgr = rpu.ComponentManager(self._reg_addr, self._sid, self._uid) + self._cfg.heartbeat = self._reg['cfg']['heartbeat'] - self._cmgr.start_bridges() - self._cmgr.start_components() + self._cmgr.start_bridges(self._cfg.bridges) + self._cmgr.start_components(self._cfg.components) # at this point the session is up and connected, and it should have # brought up all communication bridges and components. We are diff --git a/src/radical/pilot/agent/executing/base.py b/src/radical/pilot/agent/executing/base.py index 24e1da8730..e93fbbe607 100644 --- a/src/radical/pilot/agent/executing/base.py +++ b/src/radical/pilot/agent/executing/base.py @@ -50,16 +50,16 @@ def create(cls, cfg, session): if cls != AgentExecutingComponent: raise TypeError('Factory only available to base class!') - name = cfg['spawner'] + name = session.cfg.resource_cfg.agent_spawner from .popen import Popen from .flux import Flux from .sleep import Sleep impl = { - EXECUTING_NAME_POPEN : Popen, - EXECUTING_NAME_FLUX : Flux, - EXECUTING_NAME_SLEEP : Sleep, + EXECUTING_NAME_POPEN: Popen, + EXECUTING_NAME_FLUX : Flux, + EXECUTING_NAME_SLEEP: Sleep, } if name not in impl: @@ -72,26 +72,28 @@ def create(cls, cfg, session): # def initialize(self): - # self._log.debug('exec base initialize') + session_cfg = ru.Config(cfg=self._reg['cfg']) + resource_cfg = ru.Config(cfg=session_cfg['resource_cfg']) - # The spawner/executor needs the ResourceManager information which have - # been collected during agent startup. - self._rm = rpa.ResourceManager.create(self._cfg.resource_manager, - self._cfg, self._log, self._prof) + # the resource manager needs to connect to the registry + resource_cfg.reg_addr = self._cfg.reg_addr + + rm_name = resource_cfg['resource_manager'] + self._rm = rpa.ResourceManager.create(rm_name, resource_cfg, + self._log, self._prof) self._pwd = os.path.realpath(os.getcwd()) self.sid = self._cfg['sid'] - self.resource = self._cfg['resource'] - self.rsbox = self._cfg['resource_sandbox'] - self.ssbox = self._cfg['session_sandbox'] - self.psbox = self._cfg['pilot_sandbox'] + self.resource = session_cfg['resource'] + self.rsbox = session_cfg['resource_sandbox'] + self.ssbox = session_cfg['session_sandbox'] + self.psbox = session_cfg['pilot_sandbox'] self.gtod = '$RP_PILOT_SANDBOX/gtod' self.prof = '$RP_PILOT_SANDBOX/prof' - # if so configured, let the Task know what to use as tmp dir - self._task_tmp = self._cfg.get('task_tmp', - os.environ.get('TMP', '/tmp')) - + # if so configured, let the tasks know what to use as tmp dir + self._task_tmp = resource_cfg.get('task_tmp', + os.environ.get('TMP', '/tmp')) if self.psbox.startswith(self.ssbox): self.psbox = '$RP_SESSION_SANDBOX%s' % self.psbox[len(self.ssbox):] diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index 50e45ac0eb..c4d2e5bf82 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -345,9 +345,10 @@ def _handle_task(self, task): self._log.info('Launching task %s via %s in %s', tid, cmdline, sbox) _launch_out_h = ru.ru_open('%s/%s.launch.out' % (sbox, tid), 'w') + # `start_new_session=True` is default, which enables decoupling # from the parent process group (part of the task cancellation) - _start_new_session = self._cfg['resource_cfg'].\ + _start_new_session = self._session.cfg['resource_cfg'].\ get('new_session_per_task', True) self._prof.prof('task_run_start', uid=tid) diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index 3ed6bcee5d..07cd66d869 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -61,7 +61,7 @@ class RMInfo(ru.TypedDict): 'mem_per_node' : int, # memory per node (MB) 'details' : {None: None}, # dict of launch method info - 'lm_info' : {str: None}, # dict of launch method info + 'launch_methods' : {str: None}, # dict of launch method info } _defaults = { @@ -151,12 +151,12 @@ def __init__(self, cfg, log, prof): # have a valid info - store in registry and complete initialization reg.put('rm.%s' % self.name.lower(), rm_info.as_dict()) - # set up launch methods even when initialized from registry info - self._prepare_launch_methods(rm_info) - reg.close() self._set_info(rm_info) + # set up launch methods even when initialized from registry info + self._prepare_launch_methods(rm_info) + # -------------------------------------------------------------------------- # @@ -219,11 +219,8 @@ def init_from_scratch(self): rm_info.threads_per_gpu = 1 rm_info.mem_per_gpu = None - - rcfg = self._cfg.resource_cfg - rm_info.mem_per_node = rcfg.mem_per_node or 0 - - system_architecture = rcfg.get('system_architecture', {}) + rm_info.mem_per_node = self._cfg.mem_per_node or 0 + system_architecture = self._cfg.get('system_architecture', {}) rm_info.threads_per_core = int(os.environ.get('RADICAL_SMT') or system_architecture.get('smt', 1)) @@ -311,6 +308,9 @@ def init_from_scratch(self): if not rm_info.node_list: raise RuntimeError('ResourceManager has no nodes left to run tasks') + # add launch method information to rm_info + rm_info.launch_methods = self._cfg.resource_cfg.launch_methods + return rm_info @@ -318,20 +318,19 @@ def init_from_scratch(self): # def _prepare_launch_methods(self, rm_info): - launch_methods = self._cfg.resource_cfg.launch_methods - + launch_methods = self._rm_info.launch_methods self._launchers = {} self._launch_order = launch_methods.get('order') or list(launch_methods) for lm_name in list(self._launch_order): - lm_cfg = launch_methods[lm_name] + lm_cfg = ru.Config(launch_methods[lm_name]) try: self._log.debug('prepare lm %s', lm_name) - lm_cfg['pid'] = self._cfg.pid - lm_cfg['reg_addr'] = self._cfg.reg_addr - lm_cfg['resource'] = self._cfg.resource + lm_cfg.pid = self._cfg.pid + lm_cfg.reg_addr = self._cfg.reg_addr + lm_cfg.resource = self._cfg.resource self._launchers[lm_name] = rpa.LaunchMethod.create( lm_name, lm_cfg, rm_info, self._log, self._prof) diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 18a74dcbfd..0245943e39 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -216,8 +216,15 @@ def initialize(self): # The scheduler needs the ResourceManager information which have been # collected during agent startup. - self._rm = ResourceManager.create(self._cfg.resource_manager, - self._cfg, self._log, self._prof) + session_cfg = ru.Config(cfg=self._reg['cfg']) + resource_cfg = ru.Config(cfg=session_cfg['resource_cfg']) + + # the resource manager needs to connect to the registry + resource_cfg.reg_addr = self._cfg.reg_addr + + rm_name = resource_cfg['resource_manager'] + self._rm = ResourceManager.create(rm_name, resource_cfg, + self._log, self._prof) self._partitions = self._rm.get_partitions() # {plabel : [node_ids]} @@ -286,7 +293,7 @@ def create(cls, cfg, session): if cls != AgentSchedulingComponent: raise TypeError("Scheduler Factory only available to base class!") - name = cfg['scheduler'] + name = session.cfg.resource_cfg.agent_scheduler from .continuous_ordered import ContinuousOrdered from .continuous_colo import ContinuousColo @@ -593,6 +600,11 @@ def _schedule_tasks(self): tasks. ''' + # ZMQ endpoints will not have survived the fork. Specifically the + # registry client of the component base class will have to reconnect. + # FIXME: should be moved into a post-fork hook of the base class + self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr, pwd=self._sid) + # FIXME: the component does not clean out subscribers after fork :-/ self._subscribers = dict() diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index af607c02fe..f0e1f0221c 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -20,9 +20,6 @@ "target" : "local", "mode" : "shared", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 @@ -73,7 +70,9 @@ "components" : { # the update worker must live in agent.0, since only that agent is # sure to have connectivity toward the DB. - "update" : {"count" : 1}, + "update" : {"count" : 1, + "db_bulk_time" : 1.0, + "db_bulk_size" : 1024 }, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, "agent_executing" : {"count" : 1}, diff --git a/src/radical/pilot/configs/agent_default_sa.json b/src/radical/pilot/configs/agent_default_sa.json index b851f919af..429481b15c 100644 --- a/src/radical/pilot/configs/agent_default_sa.json +++ b/src/radical/pilot/configs/agent_default_sa.json @@ -20,9 +20,6 @@ "target" : "local", "mode" : "shared", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 @@ -73,7 +70,9 @@ "components" : { # the update worker must live in agent.0, since only that agent is # sure to have connectivity toward the DB. - "update" : {"count" : 1}, + "update" : {"count" : 1 + "db_bulk_time" : 1.0, + "db_bulk_size" : 1024 }, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, # "agent_executing" : {"count" : 1}, diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index 585f82b63d..f8fc2530a1 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -7,9 +7,6 @@ "base" : "${RADICAL_PILOT_BASE:$PWD}", "record" : "${RADICAL_PILOT_SESSION_RECORD}", - "bulk_time" : 1.0, - "bulk_size" : 1024, - "heartbeat" : { "interval" : 1.0, "timeout" : 60.0 @@ -41,8 +38,10 @@ "components" : { # how many instances of the respective components should be started - "update" : { "count" : 1 }, - "stager" : { "count" : 1 } + "update" : { "count" : 1 , + "db_bulk_time": 1.0, + "db_bulk_size": 1024 }, + "stager" : { "count" : 1 } } } diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index a6bb17077c..823cfaefda 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -136,9 +136,9 @@ def __init__(self, session, uid=None, cfg='default'): self._rep.info('< Date: Mon, 8 May 2023 09:00:09 +0200 Subject: [PATCH 039/171] fix some tests --- src/radical/pilot/agent/executing/popen.py | 2 +- .../pilot/agent/resource_manager/base.py | 6 ++++- tests/component_tests/test_component.py | 18 +++++++++------ tests/unit_tests/test_executing/test_base.py | 18 ++++++++++----- tests/unit_tests/test_executing/test_popen.py | 2 ++ tests/unit_tests/test_rm/test_base.py | 22 +++++++++---------- tests/unit_tests/test_scheduler/test_base.py | 4 +++- 7 files changed, 44 insertions(+), 28 deletions(-) diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index c4d2e5bf82..9fe1271a42 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -349,7 +349,7 @@ def _handle_task(self, task): # `start_new_session=True` is default, which enables decoupling # from the parent process group (part of the task cancellation) _start_new_session = self._session.cfg['resource_cfg'].\ - get('new_session_per_task', True) + get('new_session_per_task', False) self._prof.prof('task_run_start', uid=tid) task['proc'] = sp.Popen(args = cmdline, diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index 07cd66d869..08a3581911 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -309,6 +309,7 @@ def init_from_scratch(self): raise RuntimeError('ResourceManager has no nodes left to run tasks') # add launch method information to rm_info + print('1 ============', self._cfg.resource_cfg.launch_methods) rm_info.launch_methods = self._cfg.resource_cfg.launch_methods return rm_info @@ -318,6 +319,8 @@ def init_from_scratch(self): # def _prepare_launch_methods(self, rm_info): + import pprint + print('2 ============', pprint.pformat(rm_info.as_dict())) launch_methods = self._rm_info.launch_methods self._launchers = {} self._launch_order = launch_methods.get('order') or list(launch_methods) @@ -334,7 +337,8 @@ def _prepare_launch_methods(self, rm_info): self._launchers[lm_name] = rpa.LaunchMethod.create( lm_name, lm_cfg, rm_info, self._log, self._prof) - except: + except Exception as e: + print(repr(e)) self._log.exception('skip lm %s', lm_name) self._launch_order.remove(lm_name) diff --git a/tests/component_tests/test_component.py b/tests/component_tests/test_component.py index ec0c57b22c..89abddc8dd 100755 --- a/tests/component_tests/test_component.py +++ b/tests/component_tests/test_component.py @@ -51,15 +51,19 @@ def test_cm_start_components(self, mocked_sh_callout, mocked_init): } } - cm = ComponentManager(None) - cm._uids = [] - cm._uid = 'cm.0000' - cm._sid = 'session.0000' - cm._cfg = ru.Config(cfg=cfg) - cm._log = cm._prof = cm._hb = mock.Mock() + cm = ComponentManager('sid', 'reg_addr', 'owner') + cm._uids = [] + cm._uid = 'cm.0000' + cm._sid = 'session.0000' + cm._owner = 'cm.0000' + cm._cfg = ru.Config(cfg=cfg) + cm._log = cm._prof = cm._hb = mock.Mock() cm._hb.wait_startup = mock.Mock(return_value=0) - cm.start_components() + cm._reg = ru.Config() + cm._reg_addr = None + + cm.start_components(ru.Config(cfg=cfg['components'])) for cname, ccfg in cfg['components'].items(): for fname in glob.glob('%s/%s*.json' % (cfg['path'], cname)): diff --git a/tests/unit_tests/test_executing/test_base.py b/tests/unit_tests/test_executing/test_base.py index 8e4e3cdfb2..380aa3dd34 100755 --- a/tests/unit_tests/test_executing/test_base.py +++ b/tests/unit_tests/test_executing/test_base.py @@ -38,14 +38,14 @@ def work(self, tasks): # method `create` is allowed to be called by the base class only NewExecuting.create(cfg=None, session=None) - spawners = [ - {'spawner': 'POPEN'}, - {'spawner': 'UNKNOWN'} - ] + spawners = ['POPEN', 'UNKNOWN'] + for spawner in spawners: + session = ru.Config(cfg={ + 'cfg': { 'resource_cfg': { 'agent_spawner' : spawner}}}) try: - AgentExecutingComponent.create(cfg=spawner, session=None) + AgentExecutingComponent.create(cfg=spawner, session=session) except: # in case of spawner is not presented in `rpa.executing.base` with self.assertRaises(ValueError): @@ -64,7 +64,6 @@ def test_initialize(self, mocked_rm, mocked_init): ec = AgentExecutingComponent(cfg=None, session=None) ec._cfg = ru.TypedDict(from_dict={ 'sid' : 'sid.0000', - 'resource_manager': 'FORK', 'resource_sandbox': '', 'session_sandbox' : '', 'pilot_sandbox' : '', @@ -72,6 +71,13 @@ def test_initialize(self, mocked_rm, mocked_init): 'resource_cfg' : {'order': [], 'launch_methods': {'SRUN': {}}} }) + ec._reg = ru.Config(cfg={ + 'cfg': {'resource' : 'localhost', + 'pilot_sandbox' : '', + 'session_sandbox' : '', + 'resource_sandbox': '', + 'resource_cfg' : {'resource_manager': 'FORK', + 'agent_spawner' : 'POPEN'}}}) ec._log = ec._prof = mock.Mock() ec.work = ec.control_cb = mock.Mock() ec.register_input = ec.register_output = mock.Mock() diff --git a/tests/unit_tests/test_executing/test_popen.py b/tests/unit_tests/test_executing/test_popen.py index 5b69171eca..997d12144d 100755 --- a/tests/unit_tests/test_executing/test_popen.py +++ b/tests/unit_tests/test_executing/test_popen.py @@ -91,6 +91,7 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, pex.psbox = '' pex.gtod = '' pex.prof = '' + pex._session = ru.Config(cfg={'cfg': {'resource_cfg': {}}}) pex._rm = mock.Mock() pex._rm.find_launcher = mocked_find_launcher @@ -98,6 +99,7 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, pex._handle_task(task) popen_input_kwargs = mocked_sp_popen.call_args_list[0][1] + print(popen_input_kwargs) self.assertFalse(popen_input_kwargs['start_new_session']) for prefix in ['.launch.sh', '.exec.sh']: diff --git a/tests/unit_tests/test_rm/test_base.py b/tests/unit_tests/test_rm/test_base.py index 8f9a4499da..5da1e1408c 100755 --- a/tests/unit_tests/test_rm/test_base.py +++ b/tests/unit_tests/test_rm/test_base.py @@ -208,12 +208,10 @@ def test_prepare_launch_methods(self, mocked_lm, mocked_init): mocked_lm.create.return_value = mocked_lm rm = ResourceManager(cfg=None, log=None, prof=None) - rm._log = rm._prof = mock.Mock() - rm._cfg = ru.TypedDict({'pid' : None, - 'reg_addr': None, - 'resource_cfg': { - 'launch_methods': {'SRUN': {}} - }}) + rm._log = rm._prof = mock.Mock() + rm._cfg = ru.TypedDict({'pid' : None, + 'reg_addr': None}) + rm._rm_info = ru.TypedDict({'launch_methods': {'SRUN': {}}}) # launching order not provided @@ -223,15 +221,15 @@ def test_prepare_launch_methods(self, mocked_lm, mocked_init): # launching order provided - rm._cfg.resource_cfg.launch_methods = {'order': ['SSH'], - 'SRUN' : {}, - 'SSH' : {}} + rm._rm_info.launch_methods = {'order': ['SSH'], + 'SRUN' : {}, + 'SSH' : {}} rm._prepare_launch_methods(None) self.assertEqual(rm._launch_order, ['SSH']) # launching methods not provided - rm._cfg.resource_cfg.launch_methods = {} + rm._rm_info.launch_methods = {} with self.assertRaises(RuntimeError): rm._prepare_launch_methods(None) @@ -240,7 +238,7 @@ def test_prepare_launch_methods(self, mocked_lm, mocked_init): def lm_raise_exception(*args, **kwargs): raise Exception('LM Error') - rm._cfg.resource_cfg.launch_methods = {'SRUN': {}, 'SSH': {}} + rm._rm_info.launch_methods = {'SRUN': {}, 'SSH': {}} mocked_lm.create = mock.MagicMock(side_effect=lm_raise_exception) # all LMs will be skipped, thus RuntimeError raised with self.assertRaises(RuntimeError): @@ -259,7 +257,7 @@ def lm_raise_exception_once(*args, **kwargs): raise Exception('LM Error') return mocked_lm - rm._cfg.resource_cfg.launch_methods = {'SRUN': {}, 'SSH': {}} + rm._rm_info.launch_methods = {'SRUN': {}, 'SSH': {}} mocked_lm.create = mock.MagicMock(side_effect=lm_raise_exception_once) rm._prepare_launch_methods(None) # only second LM is considered successful diff --git a/tests/unit_tests/test_scheduler/test_base.py b/tests/unit_tests/test_scheduler/test_base.py index 9e664d6cab..64adbd8e97 100755 --- a/tests/unit_tests/test_scheduler/test_base.py +++ b/tests/unit_tests/test_scheduler/test_base.py @@ -63,7 +63,9 @@ def _mock_get(_c, name): from functools import partial mock_get = partial(_mock_get, c) - sched._cfg = ru.Config(from_dict=c['config']) + sched._cfg = ru.Config(from_dict={'reg_addr': 'addr'}) + sched._reg = ru.Config(from_dict=c['config']) + with mock.patch.object(ru.zmq.RegistryClient, 'get', mock_get): if 'RuntimeError' in c['result']: with pytest.raises(RuntimeError): From aed58ce03dd61dcbd7b750717dc06c62e812d6e7 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 8 May 2023 09:17:38 +0200 Subject: [PATCH 040/171] linting --- src/radical/pilot/raptor/master.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index 994798c2a3..88db7a3150 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -76,7 +76,7 @@ def __init__(self, cfg=None): 'bulk_size' : 1}) # FIXME: how to pass cfg? - self._input_queue = ru.zmq.Queue(qname) + self._input_queue = ru.zmq.Queue(qname, cfg=input_cfg) self._input_queue.start() # send completed request tasks to agent output staging / tmgr From 798b068b479345f55ca381373168f35ec22a8e64 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 8 May 2023 12:47:17 +0200 Subject: [PATCH 041/171] recover missing state transition --- src/radical/pilot/pmgr/launching/saga.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/radical/pilot/pmgr/launching/saga.py b/src/radical/pilot/pmgr/launching/saga.py index 039dc86c8f..67b8553432 100644 --- a/src/radical/pilot/pmgr/launching/saga.py +++ b/src/radical/pilot/pmgr/launching/saga.py @@ -67,6 +67,8 @@ def _translate_state(self, saga_state): # def _job_state_cb(self, job, _, saga_state, pid): + self._log.debug('=== job state: %s %s %s', pid, saga_state, job.id) + try: with self._lock: @@ -206,6 +208,11 @@ def kill_pilots(self, pids): self._log.debug('cancellation start') tc.cancel() tc.wait() + + for pid in pids: + pilot = self._pilots[pid] + self._state_cb(pilot, rps.CANCELED) + self._log.debug('cancellation done') From c180e19711a720d45fec853b97047fd40df96117 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 8 May 2023 12:47:45 +0200 Subject: [PATCH 042/171] remove debug marker --- src/radical/pilot/pmgr/launching/saga.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/radical/pilot/pmgr/launching/saga.py b/src/radical/pilot/pmgr/launching/saga.py index 67b8553432..9faf852639 100644 --- a/src/radical/pilot/pmgr/launching/saga.py +++ b/src/radical/pilot/pmgr/launching/saga.py @@ -67,7 +67,7 @@ def _translate_state(self, saga_state): # def _job_state_cb(self, job, _, saga_state, pid): - self._log.debug('=== job state: %s %s %s', pid, saga_state, job.id) + self._log.debug('job state: %s %s %s', pid, saga_state, job.id) try: with self._lock: From b49c342609785dee5f12d7a7480ca6799b4e3fb1 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 8 May 2023 13:02:22 +0200 Subject: [PATCH 043/171] compensate component workdir change --- src/radical/pilot/tmgr/staging_input/default.py | 2 +- src/radical/pilot/tmgr/staging_output/default.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 9fcaccfa41..f0707365b1 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -297,7 +297,7 @@ def _handle_task(self, task, actionables): self._prof.prof("create_sandbox_start", uid=uid) - src_context = {'pwd' : os.getcwd(), # !!! + src_context = {'pwd' : task['client_sandbox'], # !!! 'client' : task['client_sandbox'], 'task' : task['task_sandbox'], 'pilot' : task['pilot_sandbox'], diff --git a/src/radical/pilot/tmgr/staging_output/default.py b/src/radical/pilot/tmgr/staging_output/default.py index d201518cf5..1f62954c5a 100644 --- a/src/radical/pilot/tmgr/staging_output/default.py +++ b/src/radical/pilot/tmgr/staging_output/default.py @@ -115,7 +115,7 @@ def _handle_task(self, task, actionables): 'session' : task['session_sandbox'], 'resource' : task['resource_sandbox'], 'endpoint' : task['endpoint_fs']} - tgt_context = {'pwd' : os.getcwd(), # !!! + tgt_context = {'pwd' : task['client_sandbox'], # !!! 'client' : task['client_sandbox'], 'task' : task['task_sandbox'], 'pilot' : task['pilot_sandbox'], From 022c915eacba27e0b0cb1740e6415daad9c06bf1 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 13 May 2023 20:26:51 +0200 Subject: [PATCH 044/171] reorder agent startup - reorder config steps - fix examples - let raptor use registry --- bin/radical-pilot-agent | 18 +- bin/radical-pilot-component | 2 +- examples/12_task_env.py | 306 ++++++++------- examples/misc/raptor.cfg | 8 +- examples/misc/raptor.py | 7 - examples/misc/raptor_master.py | 13 +- requirements.txt | 2 +- src/radical/pilot/agent/agent_0.py | 62 ++- src/radical/pilot/agent/agent_n.py | 10 - src/radical/pilot/agent/executing/base.py | 23 +- src/radical/pilot/agent/executing/flux.py | 2 +- src/radical/pilot/agent/executing/popen.py | 10 +- .../pilot/agent/resource_manager/base.py | 12 +- .../pilot/agent/resource_manager/fork.py | 2 +- src/radical/pilot/agent/scheduler/base.py | 12 +- src/radical/pilot/agent/scheduler/flux.py | 5 +- src/radical/pilot/db/database.py | 39 +- src/radical/pilot/pmgr/launching/base.py | 2 +- src/radical/pilot/raptor/master.py | 63 +-- src/radical/pilot/raptor/worker.py | 17 +- src/radical/pilot/raptor/worker_default.py | 3 +- src/radical/pilot/session.py | 365 ++++++++++-------- src/radical/pilot/staging_directives.py | 6 +- src/radical/pilot/utils/component.py | 27 +- 24 files changed, 522 insertions(+), 494 deletions(-) diff --git a/bin/radical-pilot-agent b/bin/radical-pilot-agent index 33cec7c9a1..3952af543b 100755 --- a/bin/radical-pilot-agent +++ b/bin/radical-pilot-agent @@ -40,13 +40,19 @@ def bootstrap_3(aid): setproctitle.setproctitle('rp.%s' % aid) cfg = ru.Config(path='%s.cfg' % aid) - cfg.uid = aid - cfg.aid = aid # used by executor - cfg.path = os.getcwd() - cfg.base = os.getcwd() - # start a non-primary session (the agents will own their cmgrs) - session = rp.Session(cfg=cfg, _primary=False) + # this script runs the registry for the agent instances + reg_service = ru.zmq.Registry(uid=aid + '.reg') + reg_service.start() + + cfg.reg_addr = reg_service.addr + + assert cfg.reg_addr + + session = rp.Session(uid=cfg.sid, _reg_addr=cfg.reg_addr, _primary=False) + + cfg.uid = aid + cfg.aid = aid # used by executor if aid == 'agent.0': agent = rp.Agent_0(cfg, session) else : agent = rp.Agent_n(cfg, session) diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index e66df6592c..eae0938dbc 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -80,7 +80,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): spt.setproctitle('rp.%s' % uid) # start a non-primary session - session = rp.Session(cfg=scfg, _primary=False) + session = rp.Session(uid=sid, _primary=False, _reg_addr=reg_addr) # create the component and begin to work comp = rp.utils.Component.create(ccfg, session) diff --git a/examples/12_task_env.py b/examples/12_task_env.py index 6e69b6a0e4..4d3b4fb499 100755 --- a/examples/12_task_env.py +++ b/examples/12_task_env.py @@ -1,154 +1,152 @@ -#!/usr/bin/env python - -__copyright__ = 'Copyright 2013-2014, http://radical.rutgers.edu' -__license__ = 'MIT' - -import os -import sys - -verbose = os.environ.get('RADICAL_PILOT_VERBOSE', 'REPORT') -os.environ['RADICAL_PILOT_VERBOSE'] = verbose - -import radical.pilot as rp -import radical.utils as ru - - -# ------------------------------------------------------------------------------ -# -# READ the RADICAL-Pilot documentation: https://radicalpilot.readthedocs.io/ -# -# ------------------------------------------------------------------------------ - - -# ------------------------------------------------------------------------------ -# -if __name__ == '__main__': - - # we use a reporter class for nicer output - report = ru.Reporter(name='radical.pilot') - report.title('Getting Started (RP version %s)' % rp.version) - - # use the resource specified as argument, fall back to localhost - if len(sys.argv) > 2: report.exit('Usage:\t%s [resource]\n\n' % sys.argv[0]) - elif len(sys.argv) == 2: resource = sys.argv[1] - else : resource = 'local.localhost' - - # Create a new session. No need to try/except this: if session creation - # fails, there is not much we can do anyways... - session = rp.Session() - - # all other pilot code is now tried/excepted. If an exception is caught, we - # can rely on the session object to exist and be valid, and we can thus tear - # the whole RP stack down via a 'session.close()' call in the 'finally' - # clause... - try: - - # read the config used for resource details - report.info('read config') - config = ru.read_json('%s/config.json' % os.path.dirname(__file__)) - report.ok('>>ok\n') - - report.header('submit pilots') - - # Add a PilotManager. PilotManagers manage one or more pilots. - pmgr = rp.PilotManager(session=session) - - # Define an [n]-core local pilot that runs for [x] minutes - # Here we use a dict to initialize the description object - pd_init = {'resource' : resource, - 'runtime' : 15, # pilot runtime (min) - 'exit_on_error' : True, - 'project' : config[resource].get('project', None), - 'queue' : config[resource].get('queue', None), - 'access_schema' : config[resource].get('schema', None), - 'cores' : config[resource].get('cores', 1), - 'gpus' : config[resource].get('gpus', 0), - } - pdesc = rp.PilotDescription(pd_init) - - # Launch the pilot. - pilot = pmgr.submit_pilots(pdesc) - - pilot.prepare_env('numpy_env', {'type' : 'virtualenv', - 'version': '3.7', - 'setup' : ['numpy']}) - - - report.header('submit tasks') - - # Register the pilot in a TaskManager object. - tmgr = rp.TaskManager(session=session) - tmgr.add_pilots(pilot) - - # Create a workload of tasks. - # Each task runs '/bin/date'. - n = 2 # number of tasks to run - report.info('create %d task description(s)\n\t' % n) - - tds = list() - for i in range(0, n): - - # create a new task description, and fill it. - # Here we don't use dict initialization. - td = rp.TaskDescription() - td.executable = 'python3' - td.arguments = ['-c', 'import numpy; print(numpy.__file__)'] - td.named_env = 'numpy_env' - tds.append(td) - report.progress() - - report.ok('>>ok\n') - - # Submit the previously created task descriptions to the - # PilotManager. This will trigger the selected scheduler to start - # assigning tasks to the pilots. - tasks = tmgr.submit_tasks(tds) - - # Wait for all tasks to reach a final state (DONE, CANCELED or FAILED). - report.header('gather results') - tmgr.wait_tasks() - - report.info('\n') - for task in tasks: - report.plain(' * %s: %s, exit: %3s, out: %s\n' - % (task.uid, task.state[:4], - task.exit_code, task.stdout[:35])) - - # get some more details for one task: - task_dict = tasks[0].as_dict() - report.plain("task workdir : %s\n" % task_dict['task_sandbox']) - report.plain("pilot id : %s\n" % task_dict['pilot']) - report.plain("exit code : %s\n" % task_dict['exit_code']) - report.plain("stdout : %s\n" % task_dict['stdout']) - - # get some more details for one task: - task_dict = tasks[1].as_dict() - report.plain("task workdir : %s\n" % task_dict['task_sandbox']) - report.plain("pilot id : %s\n" % task_dict['pilot']) - report.plain("exit code : %s\n" % task_dict['exit_code']) - report.plain("exit stdout : %s\n" % task_dict['stdout']) - - - except Exception as e: - # Something unexpected happened in the pilot code above - report.error('caught Exception: %s\n' % e) - raise - - except (KeyboardInterrupt, SystemExit): - # the callback called sys.exit(), and we can here catch the - # corresponding KeyboardInterrupt exception for shutdown. We also catch - # SystemExit (which gets raised if the main threads exits for some other - # reason). - report.warn('exit requested\n') - - finally: - # always clean up the session, no matter if we caught an exception or - # not. This will kill all remaining pilots. - report.header('finalize') - session.close() - - report.header() - - -# ------------------------------------------------------------------------------ - +#!/usr/bin/env python3 + +__copyright__ = 'Copyright 2013-2014, http://radical.rutgers.edu' +__license__ = 'MIT' + +import os +import sys + +verbose = os.environ.get('RADICAL_PILOT_VERBOSE', 'REPORT') +os.environ['RADICAL_PILOT_VERBOSE'] = verbose + +import radical.pilot as rp +import radical.utils as ru + + +# ------------------------------------------------------------------------------ +# +# READ the RADICAL-Pilot documentation: https://radicalpilot.readthedocs.io/ +# +# ------------------------------------------------------------------------------ + + +# ------------------------------------------------------------------------------ +# +if __name__ == '__main__': + + # we use a reporter class for nicer output + report = ru.Reporter(name='radical.pilot') + report.title('Getting Started (RP version %s)' % rp.version) + + # use the resource specified as argument, fall back to localhost + if len(sys.argv) > 2: report.exit('Usage:\t%s [resource]\n\n' % sys.argv[0]) + elif len(sys.argv) == 2: resource = sys.argv[1] + else : resource = 'local.localhost' + + # Create a new session. No need to try/except this: if session creation + # fails, there is not much we can do anyways... + session = rp.Session() + + # all other pilot code is now tried/excepted. If an exception is caught, we + # can rely on the session object to exist and be valid, and we can thus tear + # the whole RP stack down via a 'session.close()' call in the 'finally' + # clause... + try: + + # read the config used for resource details + report.info('read config') + config = ru.read_json('%s/config.json' % os.path.dirname(__file__)) + report.ok('>>ok\n') + + report.header('submit pilots') + + # Add a PilotManager. PilotManagers manage one or more pilots. + pmgr = rp.PilotManager(session=session) + + # Define an [n]-core local pilot that runs for [x] minutes + # Here we use a dict to initialize the description object + pd_init = {'resource' : resource, + 'runtime' : 15, # pilot runtime (min) + 'exit_on_error' : True, + 'project' : config[resource].get('project', None), + 'queue' : config[resource].get('queue', None), + 'access_schema' : config[resource].get('schema', None), + 'cores' : config[resource].get('cores', 1), + 'gpus' : config[resource].get('gpus', 0), + } + pdesc = rp.PilotDescription(pd_init) + + # Launch the pilot. + pilot = pmgr.submit_pilots(pdesc) + + pilot.prepare_env('numpy_env', {'type' : 'virtualenv', + 'setup': ['numpy']}) + + report.header('submit tasks') + + # Register the pilot in a TaskManager object. + tmgr = rp.TaskManager(session=session) + tmgr.add_pilots(pilot) + + # Create a workload of tasks. + # Each task runs '/bin/date'. + n = 2 # number of tasks to run + report.info('create %d task description(s)\n\t' % n) + + tds = list() + for i in range(0, n): + + # create a new task description, and fill it. + # Here we don't use dict initialization. + td = rp.TaskDescription() + td.executable = 'python3' + td.arguments = ['-c', 'import numpy; print(numpy.__file__)'] + td.named_env = 'numpy_env' + tds.append(td) + report.progress() + + report.ok('>>ok\n') + + # Submit the previously created task descriptions to the + # PilotManager. This will trigger the selected scheduler to start + # assigning tasks to the pilots. + tasks = tmgr.submit_tasks(tds) + + # Wait for all tasks to reach a final state (DONE, CANCELED or FAILED). + report.header('gather results') + tmgr.wait_tasks() + + report.info('\n') + for task in tasks: + report.plain(' * %s: %s, exit: %3s, out: %s\n' + % (task.uid, task.state[:4], + task.exit_code, task.stdout[:35])) + + # get some more details for one task: + task_dict = tasks[0].as_dict() + report.plain("task workdir : %s\n" % task_dict['task_sandbox']) + report.plain("pilot id : %s\n" % task_dict['pilot']) + report.plain("exit code : %s\n" % task_dict['exit_code']) + report.plain("stdout : %s\n" % task_dict['stdout']) + + # get some more details for one task: + task_dict = tasks[1].as_dict() + report.plain("task workdir : %s\n" % task_dict['task_sandbox']) + report.plain("pilot id : %s\n" % task_dict['pilot']) + report.plain("exit code : %s\n" % task_dict['exit_code']) + report.plain("exit stdout : %s\n" % task_dict['stdout']) + + + except Exception as e: + # Something unexpected happened in the pilot code above + report.error('caught Exception: %s\n' % e) + raise + + except (KeyboardInterrupt, SystemExit): + # the callback called sys.exit(), and we can here catch the + # corresponding KeyboardInterrupt exception for shutdown. We also catch + # SystemExit (which gets raised if the main threads exits for some other + # reason). + report.warn('exit requested\n') + + finally: + # always clean up the session, no matter if we caught an exception or + # not. This will kill all remaining pilots. + report.header('finalize') + session.close() + + report.header() + + +# ------------------------------------------------------------------------------ + diff --git a/examples/misc/raptor.cfg b/examples/misc/raptor.cfg index efe83e4f42..95116dd423 100644 --- a/examples/misc/raptor.cfg +++ b/examples/misc/raptor.cfg @@ -33,14 +33,14 @@ }, "master_descr": { - "mode" : "task.raptor_master", - "named_env" : "ve_raptor", + "mode" : "raptor.master", + "named_env" : "rp", "executable" : "./raptor_master.py" }, "worker_descr": { - "mode" : "task.raptor_worker", - "named_env" : "ve_raptor", + "mode" : "raptor.worker", + "named_env" : "rp", # custom worker class "raptor_class" : "MyWorker", diff --git a/examples/misc/raptor.py b/examples/misc/raptor.py index 4ad1c0dfc4..34abb42ba4 100755 --- a/examples/misc/raptor.py +++ b/examples/misc/raptor.py @@ -141,12 +141,6 @@ def task_state_cb(task, state): # radical.pilot and radical.utils from sdist archives on the local # filesystem. This only works for the default resource, local.localhost. report.info('Call pilot.prepare_env()... ') - pilot.prepare_env(env_name='ve_raptor', - env_spec={'type' : 'venv', - 'path' : '/tmp/ve3', - 'setup': [rp.sdist_path, - ru.sdist_path, - 'mpi4py']}) report.info('done\n') # Launch a raptor master task, which will launch workers and self-submit @@ -164,7 +158,6 @@ def task_state_cb(task, state): td.arguments = [cfg_file, i] td.cpu_processes = 1 td.cpu_threads = cores_per_master - td.named_env = 'rp' td.input_staging = [{'source': '%s/raptor_master.py' % PWD, 'target': 'raptor_master.py', 'action': rp.TRANSFER, diff --git a/examples/misc/raptor_master.py b/examples/misc/raptor_master.py index 78d8b97f3c..ce96de522d 100755 --- a/examples/misc/raptor_master.py +++ b/examples/misc/raptor_master.py @@ -65,7 +65,7 @@ class MyMaster(rp.raptor.Master): # -------------------------------------------------------------------------- # - def __init__(self, cfg): + def __init__(self, cfg: ru.Config): self._cnt = 0 self._submitted = defaultdict(int) @@ -73,9 +73,11 @@ def __init__(self, cfg): # initialize the task overlay base class. That base class will ensure # proper communication channels to the pilot agent. + ru.write_json('m1.json', cfg) super().__init__(cfg=cfg) + ru.write_json('m2.json', self._cfg) - self._sleep = self._cfg.sleep + self._sleep = self._raptor_cfg.sleep # -------------------------------------------------------------------------- @@ -266,7 +268,6 @@ def result_cb(self, tasks): cores_per_node = cfg.cores_per_node gpus_per_node = cfg.gpus_per_node descr = cfg.worker_descr - pwd = os.getcwd() # one node is used by master. Alternatively (and probably better), we could # reduce one of the worker sizes by one core. But it somewhat depends on @@ -280,7 +281,6 @@ def result_cb(self, tasks): # insert `n` worker tasks into the agent. The agent will schedule (place) # those workers and execute them. Insert one smaller worker (see above) # NOTE: this assumes a certain worker size / layout - out('workers: %d' % n_workers) descr['ranks'] = nodes_per_worker * cores_per_node descr['gpus_per_rank'] = nodes_per_worker * gpus_per_node worker_ids = master.submit_workers( @@ -292,22 +292,17 @@ def result_cb(self, tasks): # FIXME master.wait_workers(count=1) - out('start') master.start() - out('submit') master.submit() # let some time pass for client side tasks to complete time.sleep(60) - out('stop') # TODO: can be run from thread? master.stop() - out('join') # TODO: worker state callback master.join() - out('done') # TODO: expose RPC hooks diff --git a/requirements.txt b/requirements.txt index 1556c9f9e6..915ce6e70c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -radical.utils>=1.12 +radical.utils>=1.34 radical.saga>=1.12 radical.gtod pymongo<4 diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index e2ef3dcc84..eb3c41ad14 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -44,7 +44,6 @@ class Agent_0(rpu.Worker): def __init__(self, cfg: ru.Config, session: Session): self._uid = 'agent.0' - self._cfg = cfg self._pid = cfg.pid self._sid = cfg.sid self._pmgr = cfg.pmgr @@ -52,25 +51,34 @@ def __init__(self, cfg: ru.Config, session: Session): self._session = session self._log = ru.Logger(self._uid, ns='radical.pilot') + self._starttime = time.time() self._final_cause = None + # extract bridges, components and resource_cfg subsections from the cfg + self._bcfg = cfg.bridges + self._ccfg = cfg.components + self._rcfg = cfg.resource_cfg + + del cfg['bridges'] + del cfg['components'] + del cfg['resource_cfg'] + + # keep some state about service startups + self._service_uids_launched = list() + self._service_uids_running = list() + self._services_setup = mt.Event() + # this is the earliest point to sync bootstrap and agent profiles self._prof = ru.Profiler(ns='radical.pilot', name=self._uid) self._prof.prof('hostname', uid=cfg.pid, msg=ru.get_hostname()) - # run an inline registry service to share runtime config with other - # agents and components - reg_uid = 'radical.pilot.reg.%s' % self._uid - self._reg_service = ru.zmq.Registry(uid=reg_uid) - self._reg_service.start() - self._reg_addr = self._reg_service.addr + # init the worker / component base classes, connects registry + rpu.Worker.__init__(self, cfg, session) - self._reg = ru.zmq.RegistryClient(url=self._reg_addr, pwd=self._sid) + # store the agent config in the registry self._reg['cfg'] = self._cfg - - # let all components know where to look for the registry - self._cfg['reg_addr'] = self._reg_addr + self._reg['rcfg'] = self._rcfg # connect to MongoDB for state push/pull self._connect_db() @@ -82,23 +90,13 @@ def __init__(self, cfg: ru.Config, session: Session): # ensure that app communication channels are visible to workload self._configure_app_comm() - # expose heartbeat channel to sub-agents, bridges and components, - # and start those - self._cmgr = rpu.ComponentManager(self._sid, self._reg_addr, self._uid) - self._cfg.heartbeat = self._reg['cfg']['heartbeat'] - - ccfg = {'pid': self._pid, - 'log_lvl': 'debug'} # FIXME + # ready to configure agent components + self._cmgr = rpu.ComponentManager(self._cfg.sid, self._cfg.reg_addr, + self._uid) - self._cmgr.start_bridges(self._cfg.bridges) - self._cmgr.start_components(self._cfg.components, ccfg) + self._cmgr.start_bridges(self._bcfg) + self._cmgr.start_components(self._ccfg) - # service tasks uids, which were launched - self._service_uids_launched = list() - # service tasks uids, which were confirmed to be started - self._service_uids_running = list() - # set flag when all services are running - self._services_setup = mt.Event() # create the sub-agent configs and start the sub agents self._write_sa_configs() @@ -107,7 +105,6 @@ def __init__(self, cfg: ru.Config, session: Session): # at this point the session is up and connected, and it should have # brought up all communication bridges and components. We are # ready to rumble! - rpu.Worker.__init__(self, self._cfg, session) self.register_subscriber(rpc.CONTROL_PUBSUB, self._check_control) self.register_subscriber(rpc.STATE_PUBSUB, self._service_state_cb) @@ -141,7 +138,7 @@ def _hb_check(self): # def _hb_term_cb(self, msg=None): - self._cmgr.close() + self._session.close() self._log.warn('hb termination: %s', msg) return None @@ -165,6 +162,7 @@ def _connect_db(self): self._dbs = DBSession(sid=self._cfg.sid, dburl=self._cfg.dburl, log=self._log) + # -------------------------------------------------------------------------- # def _configure_rm(self): @@ -174,8 +172,8 @@ def _configure_rm(self): # information to the config, for the benefit of the scheduler). self._rm = ResourceManager.create(name=self._cfg.resource_manager, - cfg=self._cfg, log=self._log, - prof=self._prof) + cfg=self._cfg, rcfg=self._rcfg, + log=self._log, prof=self._prof) self._log.debug(pprint.pformat(self._rm.info)) @@ -286,13 +284,11 @@ def finalize(self): # tear things down in reverse order self._hb.stop() - self._cmgr.close() + self._session.close() if self._rm: self._rm.stop() - self._reg_service.stop() - if self._final_cause == 'timeout' : state = rps.DONE elif self._final_cause == 'cancel' : state = rps.CANCELED elif self._final_cause == 'sys.exit' : state = rps.CANCELED diff --git a/src/radical/pilot/agent/agent_n.py b/src/radical/pilot/agent/agent_n.py index d46580f444..f54c0df90c 100644 --- a/src/radical/pilot/agent/agent_n.py +++ b/src/radical/pilot/agent/agent_n.py @@ -37,20 +37,10 @@ def __init__(self, cfg, session): self._starttime = time.time() self._final_cause = None - self._reg = ru.zmq.RegistryClient(url=self._reg_addr, pwd=self._sid) - # this is the earliest point to sync bootstrap and agent profiles self._prof.prof('hostname', uid=self._pid, msg=ru.get_hostname()) self._prof.prof('sub_agent_start', uid=self._pid) - # expose heartbeat channel to sub-agents, bridges and components, - # and start those - self._cmgr = rpu.ComponentManager(self._reg_addr, self._sid, self._uid) - self._cfg.heartbeat = self._reg['cfg']['heartbeat'] - - self._cmgr.start_bridges(self._cfg.bridges) - self._cmgr.start_components(self._cfg.components) - # at this point the session is up and connected, and it should have # brought up all communication bridges and components. We are # ready to rumble! diff --git a/src/radical/pilot/agent/executing/base.py b/src/radical/pilot/agent/executing/base.py index e93fbbe607..e0baeedfaf 100644 --- a/src/radical/pilot/agent/executing/base.py +++ b/src/radical/pilot/agent/executing/base.py @@ -50,7 +50,7 @@ def create(cls, cfg, session): if cls != AgentExecutingComponent: raise TypeError('Factory only available to base class!') - name = session.cfg.resource_cfg.agent_spawner + name = session._reg['rcfg.agent_spawner'] from .popen import Popen from .flux import Flux @@ -72,27 +72,24 @@ def create(cls, cfg, session): # def initialize(self): - session_cfg = ru.Config(cfg=self._reg['cfg']) - resource_cfg = ru.Config(cfg=session_cfg['resource_cfg']) + scfg = ru.Config(cfg=self._reg['cfg']) + rcfg = ru.Config(cfg=self._reg['rcfg']) - # the resource manager needs to connect to the registry - resource_cfg.reg_addr = self._cfg.reg_addr - - rm_name = resource_cfg['resource_manager'] - self._rm = rpa.ResourceManager.create(rm_name, resource_cfg, + rm_name = rcfg['resource_manager'] + self._rm = rpa.ResourceManager.create(rm_name, scfg, rcfg, self._log, self._prof) self._pwd = os.path.realpath(os.getcwd()) self.sid = self._cfg['sid'] - self.resource = session_cfg['resource'] - self.rsbox = session_cfg['resource_sandbox'] - self.ssbox = session_cfg['session_sandbox'] - self.psbox = session_cfg['pilot_sandbox'] + self.resource = scfg['resource'] + self.rsbox = scfg['resource_sandbox'] + self.ssbox = scfg['session_sandbox'] + self.psbox = scfg['pilot_sandbox'] self.gtod = '$RP_PILOT_SANDBOX/gtod' self.prof = '$RP_PILOT_SANDBOX/prof' # if so configured, let the tasks know what to use as tmp dir - self._task_tmp = resource_cfg.get('task_tmp', + self._task_tmp = rcfg.get('task_tmp', os.environ.get('TMP', '/tmp')) if self.psbox.startswith(self.ssbox): diff --git a/src/radical/pilot/agent/executing/flux.py b/src/radical/pilot/agent/executing/flux.py index be7ba0c31b..569f2af9b3 100644 --- a/src/radical/pilot/agent/executing/flux.py +++ b/src/radical/pilot/agent/executing/flux.py @@ -120,7 +120,7 @@ def work(self, tasks): # def _listen(self): - lm_cfg = self._cfg.resource_cfg.launch_methods.get('FLUX') + lm_cfg = self._reg['rcfg.launch_methods'].get('FLUX') lm_cfg['pid'] = self._cfg.pid lm_cfg['reg_addr'] = self._cfg.reg_addr lm = LaunchMethod.create('FLUX', lm_cfg, self._cfg, diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index 9fe1271a42..a76915bba9 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -66,7 +66,7 @@ def initialize(self): self._watch_queue = queue.Queue() - self._pid = self._cfg['pid'] + self._pid = self._reg['cfg.pid'] # run watcher thread self._watcher = mt.Thread(target=self._watch) @@ -227,9 +227,6 @@ def _handle_task(self, task): ru.rec_makedir(sbox) - if td['mode'] in [RAPTOR_MASTER, RAPTOR_WORKER]: - ru.write_json('%s/%s.json' % (sbox, tid), td) - with ru.ru_open('%s/%s' % (sbox, launch_script), 'w') as fout: tmp = '' @@ -348,8 +345,7 @@ def _handle_task(self, task): # `start_new_session=True` is default, which enables decoupling # from the parent process group (part of the task cancellation) - _start_new_session = self._session.cfg['resource_cfg'].\ - get('new_session_per_task', False) + _start_new_session = self._reg['rcfg.new_session_per_task'] or False self._prof.prof('task_run_start', uid=tid) task['proc'] = sp.Popen(args = cmdline, @@ -557,11 +553,11 @@ def _get_rp_env(self, task): ret += 'export RP_SESSION_SANDBOX="%s"\n' % self.ssbox ret += 'export RP_PILOT_SANDBOX="%s"\n' % self.psbox ret += 'export RP_TASK_SANDBOX="%s"\n' % sbox + ret += 'export RP_REGISTRY_ADDRESS="%s"\n' % self._session.reg_addr # FIXME AM # ret += 'export RP_LFS="%s"\n' % self.lfs ret += 'export RP_GTOD="%s"\n' % self.gtod ret += 'export RP_PROF="%s"\n' % self.prof - # ret += 'export RP_REGISTRY_URL="%s"\n' % self.reg_addr if self._prof.enabled: ret += 'export RP_PROF_TGT="%s/%s.prof"\n' % (sbox, tid) diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index 08a3581911..77d74dbd2d 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -123,10 +123,11 @@ class ResourceManager(object): # -------------------------------------------------------------------------- # - def __init__(self, cfg, log, prof): + def __init__(self, cfg, rcfg, log, prof): self.name = type(self).__name__ self._cfg = cfg + self._rcfg = rcfg self._log = log self._prof = prof @@ -309,8 +310,7 @@ def init_from_scratch(self): raise RuntimeError('ResourceManager has no nodes left to run tasks') # add launch method information to rm_info - print('1 ============', self._cfg.resource_cfg.launch_methods) - rm_info.launch_methods = self._cfg.resource_cfg.launch_methods + rm_info.launch_methods = self._rcfg.launch_methods return rm_info @@ -319,8 +319,6 @@ def init_from_scratch(self): # def _prepare_launch_methods(self, rm_info): - import pprint - print('2 ============', pprint.pformat(rm_info.as_dict())) launch_methods = self._rm_info.launch_methods self._launchers = {} self._launch_order = launch_methods.get('order') or list(launch_methods) @@ -368,7 +366,7 @@ def get_partitions(self): # ResourceManager. # @classmethod - def create(cls, name, cfg, log, prof): + def create(cls, name, cfg, rcfg, log, prof): from .ccm import CCM from .fork import Fork @@ -399,7 +397,7 @@ def create(cls, name, cfg, log, prof): if name not in impl: raise RuntimeError('ResourceManager %s unknown' % name) - return impl[name](cfg, log, prof) + return impl[name](cfg, rcfg, log, prof) diff --git a/src/radical/pilot/agent/resource_manager/fork.py b/src/radical/pilot/agent/resource_manager/fork.py index 92c1f688d5..9b068403fc 100644 --- a/src/radical/pilot/agent/resource_manager/fork.py +++ b/src/radical/pilot/agent/resource_manager/fork.py @@ -20,7 +20,7 @@ def _init_from_scratch(self, rm_info: RMInfo) -> RMInfo: if not rm_info.cores_per_node: rm_info.cores_per_node = detected_cores - if self._cfg.resource_cfg.fake_resources: + if self._rcfg.fake_resources: self._log.info( 'virtual resource with %d cores per node (%d detected cores)' % (rm_info.cores_per_node, detected_cores)) diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 0245943e39..a81e5d4e4c 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -216,14 +216,14 @@ def initialize(self): # The scheduler needs the ResourceManager information which have been # collected during agent startup. - session_cfg = ru.Config(cfg=self._reg['cfg']) - resource_cfg = ru.Config(cfg=session_cfg['resource_cfg']) + scfg = ru.Config(cfg=self._reg['cfg']) + rcfg = ru.Config(cfg=self._reg['rcfg']) # the resource manager needs to connect to the registry - resource_cfg.reg_addr = self._cfg.reg_addr + rcfg.reg_addr = self._cfg.reg_addr - rm_name = resource_cfg['resource_manager'] - self._rm = ResourceManager.create(rm_name, resource_cfg, + rm_name = rcfg['resource_manager'] + self._rm = ResourceManager.create(rm_name, scfg, rcfg, self._log, self._prof) self._partitions = self._rm.get_partitions() # {plabel : [node_ids]} @@ -293,7 +293,7 @@ def create(cls, cfg, session): if cls != AgentSchedulingComponent: raise TypeError("Scheduler Factory only available to base class!") - name = session.cfg.resource_cfg.agent_scheduler + name = session._reg['rcfg.agent_scheduler'] from .continuous_ordered import ContinuousOrdered from .continuous_colo import ContinuousColo diff --git a/src/radical/pilot/agent/scheduler/flux.py b/src/radical/pilot/agent/scheduler/flux.py index e600ec2f5f..59d6304fe7 100644 --- a/src/radical/pilot/agent/scheduler/flux.py +++ b/src/radical/pilot/agent/scheduler/flux.py @@ -69,11 +69,10 @@ def _configure(self): # performed in retrospect by the executor, based on the scheduling and # execution events collected from Flux. qname = rpc.AGENT_EXECUTING_QUEUE - fname = '%s/%s.cfg' % (self._cfg.path, qname) - cfg = ru.read_json(fname) + cfg = self._reg['bridges.%s' % qname] self._q = ru.zmq.Putter(qname, cfg['put']) - lm_cfg = self._cfg.resource_cfg.launch_methods.get('FLUX') + lm_cfg = self._reg['rcfg.launch_methods'].get('FLUX') lm_cfg['pid'] = self._cfg.pid lm_cfg['reg_addr'] = self._cfg.reg_addr self._lm = LaunchMethod.create('FLUX', lm_cfg, self._cfg, diff --git a/src/radical/pilot/db/database.py b/src/radical/pilot/db/database.py index 5db74ac314..81f833ed1d 100644 --- a/src/radical/pilot/db/database.py +++ b/src/radical/pilot/db/database.py @@ -32,15 +32,16 @@ def __init__(self, sid, dburl, log, connect=True): tasks : document describing a rp.Task ''' - self._dburl = dburl - self._log = log - self._mongo = None - self._db = None - self._created = time.time() - self._connected = None - self._closed = None - self._c = None - self._can_remove = False + self._dburl = dburl + self._log = log + self._mongo = None + self._db = None + self._created = time.time() + self._connected = None + self._reconnected = None + self._closed = None + self._c = None + self._can_remove = False if not connect: return @@ -74,7 +75,8 @@ def __init__(self, sid, dburl, log, connect=True): 'uid' : sid, 'created' : self._created, 'connected' : self._connected}) - self._can_remove = True + self._can_remove = True + self._reconnected = False else: docs = self._c.find({'type' : 'session', @@ -83,9 +85,10 @@ def __init__(self, sid, dburl, log, connect=True): raise ValueError('cannot reconnect to session %s' % sid) doc = docs[0] - self._can_delete = False - self._created = doc['created'] - self._connected = time.time() + self._can_delete = False + self._created = doc['created'] + self._connected = time.time() + self._reconnected = True # FIXME: get bridge addresses from DB? If not, from where? @@ -129,6 +132,16 @@ def connected(self): return self._connected + # -------------------------------------------------------------------------- + # + @property + def reconnected(self): + ''' + Returns boolean indicating if the session was reconnected (vs. created) + ''' + return self._reconnected + + # -------------------------------------------------------------------------- # @property diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index 087ea609f0..eb789ea25b 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -851,7 +851,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): for arg in pre_bootstrap_0: bs_args.extend(['-e', arg]) for arg in pre_bootstrap_1: bs_args.extend(['-w', arg]) - agent_cfg['owner'] = 'agent.0' + agent_cfg['owner'] = pid agent_cfg['resource'] = resource agent_cfg['nodes'] = requested_nodes agent_cfg['cores'] = allocated_cores diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index 88db7a3150..d0e5b9f5a7 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -30,8 +30,9 @@ class Master(rpu.Component): # -------------------------------------------------------------------------- # - def __init__(self, cfg=None): + def __init__(self, cfg: ru.Config = None): + self._raptor_cfg = cfg or ru.Config() self._uid = os.environ['RP_TASK_ID'] self._pid = os.environ['RP_PILOT_ID'] self._sid = os.environ['RP_SESSION_ID'] @@ -40,6 +41,10 @@ def __init__(self, cfg=None): self._psbox = os.environ['RP_PILOT_SANDBOX'] self._ssbox = os.environ['RP_SESSION_SANDBOX'] self._rsbox = os.environ['RP_RESOURCE_SANDBOX'] + self._reg_addr = os.environ['RP_REGISTRY_ADDRESS'] + + self._reg = ru.zmq.RegistryClient(url=self._reg_addr, + pwd=self._sid) self._workers = dict() # wid: worker self._tasks = dict() # bookkeeping of submitted requests @@ -50,13 +55,18 @@ def __init__(self, cfg=None): self._hb_freq = 10 # check worker heartbetas every n seconds self._hb_timeout = 15 # consider worker dead after 15 seconds - cfg = self._get_config(cfg) - self._session = Session(cfg=cfg, uid=cfg.sid, _primary=False) + self._session = Session(uid=self._sid, _reg_addr=self._reg_addr, + _primary=False) self._rpc_handlers = dict() self.register_rpc_handler('stop', self.stop) - rpu.Component.__init__(self, cfg, self._session) + ccfg = ru.Config(from_dict={'uid' : self._uid, + 'sid' : self._sid, + 'owner' : self._pid, + 'reg_addr': self._reg_addr}) + + rpu.Component.__init__(self, ccfg, self._session) self.register_publisher(rpc.STATE_PUBSUB) self.register_publisher(rpc.CONTROL_PUBSUB) @@ -99,8 +109,8 @@ def __init__(self, cfg=None): 'stall_hwm' : 0, 'bulk_size' : 1}) - self._req_queue = ru.zmq.Queue(req_cfg) - self._res_queue = ru.zmq.Queue(res_cfg) + self._req_queue = ru.zmq.Queue('raptor_tasks', cfg=req_cfg) + self._res_queue = ru.zmq.Queue('raptor_results', cfg=res_cfg) self._req_queue.start() self._res_queue.start() @@ -162,38 +172,6 @@ def register_rpc_handler(self, cmd, handler): self._rpc_handlers[cmd] = handler - # -------------------------------------------------------------------------- - # - def _get_config(self, cfg=None): - ''' - derive a worker base configuration from the control pubsub configuration - ''' - - # FIXME: use registry for comm EP info exchange, not cfg files - - if cfg is None: - cfg = dict() - - if cfg and 'path' in cfg: - del cfg['path'] - - ru.dict_merge(cfg, ru.read_json('%s/control_pubsub.json' % self._psbox)) - - del cfg['channel'] - del cfg['cmgr'] - - cfg['log_lvl'] = 'warn' - cfg['kind'] = 'master' - cfg['sid'] = self._sid - cfg['path'] = self._sbox - cfg['base'] = os.environ['RP_PILOT_SANDBOX'] - - cfg = ru.Config(cfg=cfg) - cfg['uid'] = self._uid - - return cfg - - # -------------------------------------------------------------------------- # @property @@ -370,12 +348,10 @@ def submit_workers(self, descriptions: List[TaskDescription] needed. ''' - # FIXME registry: use registry instead of config files - tasks = list() for td in descriptions: - if not td.mode == RAPTOR_WORKER: + if td.mode != RAPTOR_WORKER: raise ValueError('unexpected task mode [%s]' % td.mode) # sharing GPUs among multiple ranks not yet supported @@ -390,7 +366,6 @@ def submit_workers(self, descriptions: List[TaskDescription] if not td.get('uid'): td.uid = '%s.%s' % (self.uid, ru.generate_id('worker', ns=self.uid)) - if not td.get('executable'): td.executable = 'radical-pilot-raptor-worker' @@ -405,6 +380,10 @@ def submit_workers(self, descriptions: List[TaskDescription] # ensure that defaults and backward compatibility kick in td.verify() + # the default worker needs it's own task description to derive the + # amount of available resources + self._reg['raptor.%s.cfg' % self._uid] = td.as_dict() + # all workers run in the same sandbox as the master task = dict() diff --git a/src/radical/pilot/raptor/worker.py b/src/radical/pilot/raptor/worker.py index f07c263f76..de78b363f9 100644 --- a/src/radical/pilot/raptor/worker.py +++ b/src/radical/pilot/raptor/worker.py @@ -33,19 +33,28 @@ def __init__(self, manager, rank, raptor_id): self._rank = rank self._raptor_id = raptor_id self._reg_event = mt.Event() + self._reg_addr = os.environ['RP_REGISTRY_ADDRESS'] self._sbox = os.environ['RP_TASK_SANDBOX'] self._uid = os.environ['RP_TASK_ID'] + self._sid = os.environ['RP_SESSION_ID'] self._ranks = int(os.environ['RP_RANKS']) + self._reg = ru.zmq.RegistryClient(url=self._reg_addr, + pwd=self._sid) + + self._cfg = ru.Config(cfg=self._reg['cfg']) + self._log = ru.Logger(name=self._uid, ns='radical.pilot.worker', - level='DEBUG', targets=['.'], path=self._sbox) + level=self._cfg.log_lvl, + targets=self._cfg.log_tgt, + path=self._cfg.path) self._prof = ru.Profiler(name=self._uid, ns='radical.pilot.worker', - path=self._sbox) + path=self._cfg.path) # register for lifetime management messages on the control pubsub psbox = os.environ['RP_PILOT_SANDBOX'] - state_cfg = ru.read_json('%s/%s.cfg' % (psbox, rpc.STATE_PUBSUB)) - ctrl_cfg = ru.read_json('%s/%s.cfg' % (psbox, rpc.CONTROL_PUBSUB)) + state_cfg = self._reg['bridges.%s' % rpc.STATE_PUBSUB] + ctrl_cfg = self._reg['bridges.%s' % rpc.CONTROL_PUBSUB] ru.zmq.Subscriber(rpc.STATE_PUBSUB, url=state_cfg['sub'], log=self._log, prof=self._prof, cb=self._state_cb, diff --git a/src/radical/pilot/raptor/worker_default.py b/src/radical/pilot/raptor/worker_default.py index 91d1a8592a..3db71720ca 100644 --- a/src/radical/pilot/raptor/worker_default.py +++ b/src/radical/pilot/raptor/worker_default.py @@ -46,7 +46,8 @@ def __init__(self, raptor_id : str): self._req_get = ru.zmq.Getter('request', self._req_addr_get, cb=self._request_cb) - self._descr = ru.read_json('%s.json' % self._uid) + # the master should have stored our own task description in the registry + self._descr = self._reg['raptor.%s.cfg' % self._uid] # keep worker ID and rank self._n_cores = self._descr.get('cores_per_rank', 1) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index b2cfa01841..e3a8d577a7 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -15,6 +15,42 @@ from . import utils as rpu +# ------------------------------------------------------------------------------ +# +class _CloseOptions(ru.TypedDict): + """Options and validation for Session.close(). + + Arguments: + cleanup (bool, optional): Remove session from MongoDB. + Implies *terminate=True*. (default False) + download (bool, optional): Fetch pilot profiles and database entries. + (Default False.) + terminate (bool, optional): Shut down all pilots associated with the + session. (Default True.) + + """ + + _schema = { + 'cleanup' : bool, + 'download' : bool, + 'terminate': bool + } + + _defaults = { + 'cleanup' : False, + 'download' : False, + 'terminate': True + } + + + # -------------------------------------------------------------------------- + # + def _verify(self): + + if self.get('cleanup') and not self.get('terminate'): + self.terminate = True + + # ------------------------------------------------------------------------------ # class Session(rs.Session): @@ -45,7 +81,7 @@ class Session(rs.Session): # -------------------------------------------------------------------------- # def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, - _reg_addr=None, **close_options): + _reg_addr=None, **close_options): """Create a new session. A new Session instance is created and stored in the database. @@ -59,8 +95,7 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, MUST be unique - otherwise they will lead to conflicts in the underlying database, resulting in undefined behaviours (or worse). - cfg (str | dict, optional): a named or instantiated configuration - to be used for the session. + cfg (dict, optional): configuration to be used for the session. _primary (bool, optional): only sessions created by the original application process (via `rp.Session()`, will connect to the DB. @@ -71,11 +106,9 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, a new DB collection - if such a DB connection is needed, the component needs to establish that on its own. - _reg_addr (`str`): primary sessions will always run a registry - service, and the registry's address will be passed to - non-primary sessions. If no such address is passed, a registry - will be started even if the session is not a primary one. That - will, for example, happen on the root session of `agent.0`. + _reg_addr (str, optional): Non-primary sessions will connect to the + registry at that endpoint and pull session config and resource + configurations from there. **close_options: If additional key word arguments are provided, they will be used as the default arguments to Session.close(). (This @@ -85,59 +118,104 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, """ + self._dburl = dburl + self._uid = uid + self._cfg = ru.Config(cfg=cfg) + self._primary = _primary + self._reg_addr = _reg_addr self._close_options = _CloseOptions(close_options) - # NOTE: `name` and `cfg` are overloaded, the user cannot point to - # a predefined config and amend it at the same time. This might - # be ok for the session, but introduces a minor API inconsistency. - name = 'default' - if isinstance(cfg, str): - name = cfg - cfg = None - - self._dbs = None - self._closed = False - self._primary = _primary - self._reg_addr = _reg_addr - - self._pmgrs = dict() # map IDs to pmgr instances - self._tmgrs = dict() # map IDs to tmgr instances - self._cmgr = None # only primary sessions have a cmgr - - # path, client_sandbox, dburl - # bridges, components - # FIXME REG: if reg_addr, pull config, otherwise load config - self._cfg = ru.Config('radical.pilot.session', name=name, cfg=cfg) - self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) - for site in self._rcfgs: - for rcfg in self._rcfgs[site].values(): - for schema in rcfg.get('schemas', []): - while isinstance(rcfg.get(schema), str): - tgt = rcfg[schema] - rcfg[schema] = rcfg[tgt] + self._close_options.verify() - if _primary: + self._dbs = None + self._closed = False + self._pmgrs = dict() # map IDs to pmgr instances + self._tmgrs = dict() # map IDs to tmgr instances + self._cmgr = None # only primary sessions have a cmgr - pwd = os.getcwd() + if self._primary: + self._initialize_primary() - if not self._cfg.sid: - if uid: - self._cfg.sid = uid - else: - self._cfg.sid = ru.generate_id('rp.session', - mode=ru.ID_PRIVATE) - if not self._cfg.base: - self._cfg.base = pwd + else: + self._initialize_non_primary() + + + # cache sandboxes etc. + self._cache_lock = ru.RLock() + self._cache = {'endpoint_fs' : dict(), + 'resource_sandbox' : dict(), + 'session_sandbox' : dict(), + 'pilot_sandbox' : dict(), + 'client_sandbox' : self._cfg.client_sandbox, + 'js_shells' : dict(), + 'fs_dirs' : dict()} + + # at this point we have a DB connection, logger, etc, and are done + self._prof.prof('session_ok', uid=self._uid, msg=int(_primary)) + + + # -------------------------------------------------------------------------- + # + def _initialize_primary(self): - if not self._cfg.path: - self._cfg.path = '%s/%s' % (self._cfg.base, self._cfg.sid) + if not self._cfg: + # NOTE: we only support the default session config ATM + self._cfg = ru.Config('radical.pilot.session', name='default') - if not self._cfg.client_sandbox: - self._cfg.client_sandbox = pwd + if not self._dburl: + self._dburl = self._cfg.dburl + + if not self._dburl: + raise RuntimeError("no db URL (set RADICAL_PILOT_DBURL)") + + # keep a dburl w/o password for logging purposes + dburl_save = ru.Url(self._dburl) + if dburl_save.get_password(): + dburl_save.set_password('***') + + self._cfg.dburl_save = str(dburl_save) + + if self._uid: + + # make sure that cfg is consistent with specified uid + if self._uid != self._cfg.get('sid', self._uid): + raise ValueError('session ID incompatible with passed config') else: - for k in ['sid', 'base', 'path']: - assert k in self._cfg, 'non-primary session misses %s' % k + + self._uid = self._cfg.get('sid') + + # if we did not find an sid in the cfg we need to generate a new ID + if not self._uid: + + # only primary sessions create UIDs + if not self._primary: + raise ValueError('non-primary sessions need a UID') + + self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) + + if self._reg_addr: + raise ValueError('primary sessions do not accept registry address') + + + # make sure the cfg has the sid set + self._cfg['sid'] = self._uid + + # we have a minimal config and uid - initialize base class + rs.Session.__init__(self, uid=self._uid) + + + pwd = os.getcwd() + + if not self._cfg.base: + self._cfg.base = pwd + + if not self._cfg.path: + self._cfg.path = '%s/%s' % (self._cfg.base, self._cfg.sid) + + if not self._cfg.client_sandbox: + self._cfg.client_sandbox = pwd + # change RU defaults to point logfiles etc. to the session sandbox def_cfg = ru.DefaultConfig() @@ -145,32 +223,18 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, def_cfg.report_dir = self._cfg.path def_cfg.profile_dir = self._cfg.path - self._uid = self._cfg.sid - - # cfg setup is complete - if not self._cfg.reg_addr: + # primary sessions always create a Registry instance - self._reg_service = ru.zmq.Registry(uid=self._uid + '.reg', - path=self.path) - self._reg_service.start() + self._reg_service = ru.zmq.Registry(uid=self._uid + '.reg', + path=self.path) + self._reg_service.start() - self._cfg.reg_addr = self._reg_service.addr + self._cfg.reg_addr = self._reg_service.addr # always create a registry client assert self._cfg.reg_addr self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr, pwd=self._uid) - # store some session and resource config data in the registry - self._reg['cfg'] = {'sid' : self._uid, - 'base' : self._cfg.base, - 'path' : self._cfg.path, - 'dburl' : self._cfg.dburl, - 'reg_addr' : self._cfg.reg_addr, - 'client_sandbox': self._cfg.client_sandbox, - 'heartbeat' : self._cfg.heartbeat, - } - - self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid) @@ -180,65 +244,56 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, self._log.info('radical.saga version: %s', rs.version_detail) self._log.info('radical.utils version: %s', ru.version_detail) - self._prof.prof('session_start', uid=self._uid, msg=int(_primary)) + self._prof.prof('session_start', uid=self._uid) + self._rep.info ('<>err\n") - self._log.exception('session create failed [%s]', dburl_no_passwd) - raise RuntimeError ('session create failed [%s]' % - dburl_no_passwd) from e + self._log.exception('session create failed [%s]', dburl_save) + raise RuntimeError('session create failed [%s]' % dburl_save) from e # primary sessions have a component manager which also manages # heartbeat. 'self._cmgr.close()` should be called during termination @@ -261,6 +316,36 @@ def _initialize_primary(self, dburl): self._rep.ok('>>ok\n') + # -------------------------------------------------------------------------- + # + def _initialize_non_primary(self): + + # a secondary session needs two pieces of information: the registry + # address to connect to, and the session ID for which we need to pull + # the config + if not self._uid: + raise ValueError('Non-primary sessions need a session ID') + + if not self._reg_addr: + raise ValueError('non-primary sessions need an registry address') + + # create a registry client + self._reg = ru.zmq.RegistryClient(url=self._reg_addr, pwd=self._uid) + + self._prof = self._get_profiler(name=self._uid) + self._rep = self._get_reporter(name=self._uid) + self._log = self._get_logger (name=self._uid) + + # load cfg and resource configs from the registry + self._cfg = ru.Config(from_dict=self._reg['cfg']) + self._rcfgs = ru.Config(from_dict=self._reg['rcfgs']) + + self._dburl = self._cfg.dburl + + # we have a minimal config and uid - initialize base class + rs.Session.__init__(self, uid=self._uid) + + # -------------------------------------------------------------------------- # context manager `with` clause # @@ -276,15 +361,17 @@ def __exit__(self, exc_type, exc_value, traceback): def close(self, **kwargs): """Close the session. - All subsequent attempts access objects attached to - the session will result in an error. If cleanup is set to True, - the session data is removed from the database. + All subsequent attempts access objects attached to the session will + result in an error. If cleanup is set to True, the session data is + removed from the database. Arguments: - cleanup (bool, optional): Remove session from MongoDB (implies *terminate=True*) - terminate (bool, optional): Shut down all pilots associated with the session. - download (bool, optional): Fetch pilot profiles and database entries. - + cleanup (bool, optional): Remove session from MongoDB (implies + *terminate=True*) + terminate (bool, optional): Shut down all pilots associated with the + session. + download (bool, optional): Fetch pilot profiles and database + entries. """ # close only once @@ -1069,40 +1156,4 @@ def excuse(): print('----------------------------------------------------') -# ------------------------------------------------------------------------------ -# -class _CloseOptions(ru.TypedDict): - """Options and validation for Session.close(). - - Arguments: - cleanup (bool, optional): Remove session from MongoDB. - Implies *terminate=True*. (default False) - download (bool, optional): Fetch pilot profiles and database entries. - (Default False.) - terminate (bool, optional): Shut down all pilots associated with the - session. (Default True.) - - """ - - _schema = { - 'cleanup' : bool, - 'download' : bool, - 'terminate': bool - } - - _defaults = { - 'cleanup' : False, - 'download' : False, - 'terminate': True - } - - - # -------------------------------------------------------------------------- - # - def _verify(self): - - if self.get('cleanup') and not self.get('terminate'): - self.terminate = True - - # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/staging_directives.py b/src/radical/pilot/staging_directives.py index 878d53b4b2..fecae1ae05 100644 --- a/src/radical/pilot/staging_directives.py +++ b/src/radical/pilot/staging_directives.py @@ -215,7 +215,11 @@ def complete_url(path : str, # we expect hostname elements to be absent for schemas we expand if purl.host: - raise ValueError('URLs cannot specify `host` for expanded schemas') + try: + raise ValueError('URLs cannot specify `host` for expanded schemas') + except: + log.exception('purl host: %s' % str(purl)) + raise if purl.schema == 'file': # we leave `file://` URLs unaltered diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index b0e058c461..cbdc27d0e5 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -169,6 +169,9 @@ def uid(self): # def start_bridges(self, bridges): + if 'bridges' not in self._reg: + self._reg['bridges'] = dict() + self._prof.prof('start_bridges_start', uid=self._uid) timeout = self._cfg.heartbeat.timeout @@ -211,6 +214,9 @@ def start_bridges(self, bridges): # def start_components(self, components, cfg = None): + if 'components' not in self._reg: + self._reg['components'] = dict() + self._prof.prof('start_components_start', uid=self._uid) timeout = self._cfg.heartbeat.timeout @@ -403,24 +409,21 @@ def __init__(self, cfg, session): # to create it's own set of locks in self.initialize. self._cfg = cfg - self._uid = cfg.uid - self._sid = cfg.sid + self._uid = self._cfg.uid + self._sid = self._cfg.sid self._session = session # we always need an UID assert self._uid, 'Component needs a uid (%s)' % type(self) # state we carry over the fork - self._debug = cfg.get('debug') - self._owner = cfg.get('owner', self.uid) - self._ctype = "%s.%s" % (self.__class__.__module__, - self.__class__.__name__) - self._number = cfg.get('number', 0) - self._name = cfg.get('name.%s' % self._number, - '%s.%s' % (self._ctype, self._number)) - - self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr, - pwd=self._sid) + self._debug = self._cfg.get('debug') + self._owner = self._cfg.get('owner', self.uid) + self._ctype = "%s.%s" % (self.__class__.__module__, + self.__class__.__name__) + + self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr, + pwd=self._sid) self._inputs = dict() # queues to get things from self._outputs = dict() # queues to send things to From 39ed925952da41b863c6db71bcb73ae9c2705f2e Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 13 May 2023 21:03:04 +0200 Subject: [PATCH 045/171] reorder agent startup - reorder config steps - fix examples - let raptor use registry --- tests/component_tests/test_session.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/component_tests/test_session.py b/tests/component_tests/test_session.py index 66430104e2..3751439285 100755 --- a/tests/component_tests/test_session.py +++ b/tests/component_tests/test_session.py @@ -20,10 +20,21 @@ class TestSession(TestCase): _cleanup_files = [] + + def se_init(self): + + self._rep = mock.Mock() + self._reg = mock.Mock() + self._log = mock.Mock() + self._prof = mock.Mock() + self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) + + # -------------------------------------------------------------------------- # @classmethod - @mock.patch.object(Session, '_initialize_primary', return_value=None) + @mock.patch.object(Session, '_initialize_primary', side_effect=se_init, + autospec=True) @mock.patch.object(Session, '_get_logger') @mock.patch.object(Session, '_get_profiler') @mock.patch.object(Session, '_get_reporter') @@ -38,6 +49,8 @@ def setUpClass(cls, *args, **kwargs) -> None: def tearDownClass(cls) -> None: for p in cls._cleanup_files: + if not p: + continue for f in glob.glob(p): if os.path.isdir(f): try: @@ -85,7 +98,8 @@ def test_get_resource_config(self): # -------------------------------------------------------------------------- # - @mock.patch.object(Session, '_initialize_primary', return_value=None) + @mock.patch.object(Session, '_initialize_primary', side_effect=se_init, + autospec=True) @mock.patch.object(Session, '_get_logger') @mock.patch.object(Session, '_get_profiler') @mock.patch.object(Session, '_get_reporter') From c35a921ce8cd3a3950e1384124b1cc7fdeb952f9 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 13 May 2023 21:50:23 +0200 Subject: [PATCH 046/171] fix some tests --- tests/component_tests/test_session.py | 13 ++++- tests/test_raptor/test_raptor.py | 68 +++++++++++++++++++-------- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/tests/component_tests/test_session.py b/tests/component_tests/test_session.py index 3751439285..b9e052a033 100755 --- a/tests/component_tests/test_session.py +++ b/tests/component_tests/test_session.py @@ -27,8 +27,18 @@ def se_init(self): self._reg = mock.Mock() self._log = mock.Mock() self._prof = mock.Mock() - self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) + self._rcfgs = ru.Config('radical.pilot.resource', name='*', + expand=False) + + for site in self._rcfgs: + for rcfg in self._rcfgs[site].values(): + for schema in rcfg.get('schemas', []): + while isinstance(rcfg.get(schema), str): + tgt = rcfg[schema] + rcfg[schema] = rcfg[tgt] + + print('====', self._rcfgs.keys()) # -------------------------------------------------------------------------- # @@ -202,6 +212,7 @@ def test_get_resource_sandbox(self): self._session._cache['resource_sandbox'] = {} # NCSA: split `project` by "-" + print('====', self._session._rcfgs.keys()) pilot['description'].update({'resource': 'ncsa.delta', 'project' : 'bbka-delta-cpu'}) self.assertIn('/bbka/', diff --git a/tests/test_raptor/test_raptor.py b/tests/test_raptor/test_raptor.py index 104b02a6d5..035ad55e0b 100755 --- a/tests/test_raptor/test_raptor.py +++ b/tests/test_raptor/test_raptor.py @@ -19,20 +19,52 @@ class TestWorker(TestCase): def read_json_side_effect(self, fname=None): return {'sub': '', 'pub': '', 'cores_per_rank': 8, 'gpus_per_rank': 2} + def dict_merge_side_effect(self, fname=None): + return {'sub': '', 'pub': '', 'cores_per_rank': 8, 'gpus_per_rank': 2} + + class MyConfig(ru.TypedDict): + def __init__(self, cfg=None, from_dict=None): + if cfg: super().__init__(from_dict=cfg) + else : super().__init__(from_dict=from_dict) + + class MyRegistry(ru.TypedDict): + + def __init__(self, url, pwd): + data = { + 'cfg': {}, + 'bridges.state_pubsub': { + 'sub': 'tcp://localhost:10000', + 'pub': 'tcp://localhost:10001' + }, + 'bridges.control_pubsub': { + 'sub': 'tcp://localhost:10000', + 'pub': 'tcp://localhost:10001' + }, + 'raptor.task.000000.cfg': { + 'cores_per_rank': 8, + 'gpus_per_rank' : 2 + } + } + + super().__init__(from_dict=data) + + + @mock.patch('radical.utils.zmq.RegistryClient', MyRegistry) @mock.patch('radical.utils.zmq.Subscriber') @mock.patch('radical.utils.zmq.Publisher') @mock.patch('radical.utils.zmq.Putter') @mock.patch('radical.utils.read_json', side_effect=read_json_side_effect) + @mock.patch('radical.utils.Config', MyConfig) @mock.patch('threading.Event') @mock.patch('threading.Thread') - def test_alloc(self, mock_1, mock_2, mock_3, mock_4, mock_5, mock_6): + def test_alloc(self, *args): - cfg = ru.Config(cfg={'uid' : 'worker.0000', - 'sid' : str(time.time()), - 'info' : {}, - 'cores_per_rank': 8, - 'gpus_per_rank' : 2}) + cfg = ru.Config(from_dict={'uid' : 'worker.0000', + 'sid' : str(time.time()), + 'info' : {}, + 'cores_per_rank': 8, + 'gpus_per_rank' : 2}) ru.zmq.Subscriber = mock.Mock() ru.zmq.Publisher = mock.Mock() @@ -44,21 +76,19 @@ def test_alloc(self, mock_1, mock_2, mock_3, mock_4, mock_5, mock_6): ru.zmq.Putter = mock.Mock() ru.zmq.Getter = mock.Mock() - rp.raptor.Worker.publish = mock.Mock() - rp.raptor.Worker._ts_addr = 'tcp://localhost:1' - rp.raptor.Worker._res_addr_put = 'tcp://localhost:2' - rp.raptor.Worker._req_addr_get = 'tcp://localhost:3' - - os.environ['RP_TASK_ID'] = 'task.000000' - os.environ['RP_TASK_SANDBOX'] = '/tmp' - os.environ['RP_PILOT_SANDBOX'] = '/tmp' - os.environ['RP_RANKS'] = str(8) + rp.raptor.Worker.publish = mock.Mock() + rp.raptor.Worker._ts_addr = 'tcp://localhost:1' + rp.raptor.Worker._res_addr_put = 'tcp://localhost:2' + rp.raptor.Worker._req_addr_get = 'tcp://localhost:3' - with ru.ru_open('/tmp/control_pubsub.cfg', 'w') as fout: - fout.write('{"sub": "tcp://localhost:10000", ' - ' "pub": "tcp://localhost:10001"}\n') + os.environ['RP_TASK_ID'] = 'task.000000' + os.environ['RP_TASK_SANDBOX'] = '/tmp' + os.environ['RP_PILOT_SANDBOX'] = '/tmp' + os.environ['RP_RANKS'] = str(8) + os.environ['RP_SESSION_ID'] = 'foo' + os.environ['RP_REGISTRY_ADDRESS'] = 'tcp://localhost:10001' - worker = rp.raptor.DefaultWorker(cfg) + worker = rp.raptor.DefaultWorker('master.0000') task_1 = {'uid': 'task.0000', 'cores': 1, 'gpus' : 1} task_2 = {'uid': 'task.0001', 'cores': 2, 'gpus' : 1} From 935b509f927227695fd7295503c8985167b00804 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 13 May 2023 22:21:17 +0200 Subject: [PATCH 047/171] tests --- tests/test_raptor/test_raptor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_raptor/test_raptor.py b/tests/test_raptor/test_raptor.py index 035ad55e0b..5a22b70254 100755 --- a/tests/test_raptor/test_raptor.py +++ b/tests/test_raptor/test_raptor.py @@ -129,8 +129,6 @@ def test_alloc(self, *args): self.assertEqual(worker._resources['cores'], [0, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(worker._resources['gpus' ], [0, 0]) - os.unlink('/tmp/control_pubsub.cfg') - # ------------------------------------------------------------------------------ # From ddbdb5e349ad2f750bf27c48c2be8607ddf93223 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 18 May 2023 12:28:36 +0200 Subject: [PATCH 048/171] snapshot --- src/radical/pilot/session.py | 187 ++++++++++++++++++++++------------- 1 file changed, 120 insertions(+), 67 deletions(-) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index db8b79466b..3ba4b91824 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -190,10 +190,31 @@ def __init__(self, proxy_url : Optional[str ] = None, self._tmgrs = dict() # map IDs to tmgr instances self._cmgr = None # only primary sessions have a cmgr - self._init_uid() - self._init_registry() - self._init_cfg() - self._init_proxy(proxy_url, proxy_host) + + if self._role == self._PRIMARY: + + # if user did not set a uid, we need to generate a new ID + if not self._uid: + self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) + + self._init_primary() + + + elif self._role == self._AGENT_0: + + if self._uid: + raise ValueError('non-primary sessions need a UID') + + self._init_agent_0() + + + elif self._role in [self._AGENT_N, self._DEFAULT]: + + if self._uid: + raise ValueError('non-primary sessions need a UID') + + self._init_secondary() + # now we have config and uid - initialize base class (saga session) rs.Session.__init__(self, uid=self._uid) @@ -220,61 +241,111 @@ def __init__(self, proxy_url : Optional[str ] = None, # -------------------------------------------------------------------------- # - def _init_uid(self): + def _init_primary(self): + + # The primary session + # - reads session config files + # - reads resource config files + # - starts the client side registry service + # - pushes the configs into that registry + # - pushes bridge and component configs into that registry + # - starts a ZMQ proxy (or ensures one is up and running) + + # we still call `_init_cfg` to complete missing config settings + # FIXME: completion only needed by `PRIMARY` + self._read_cfg() + + # primary sessions create a registry service + self._start_registry() + self._init_registry() - if self._role == self._PRIMARY: + # store the session config in the new registry + self._reg.put('sid', self._uid) - # only primary sessions create UIDs - if self._uid: - raise ValueError('only non-primary sessions need a UID') + # only primary sessions and agent_0 connect to the ZMQ proxy + self._init_proxy() - # if user did not set a uid, we need to generate a new ID - self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) - else: + # -------------------------------------------------------------------------- + # + def _init_agent_0(self): - if not self._uid: - raise ValueError('non-primary sessions need a UID') + # The agent_0 session expects the `cfg` parameter to contain the + # complete agent config! + # + # - starts the agent side registry service + # - separates + # - session config (== agent config) + # - bridge configs + # - component configs + # - resource config + # - pushes them all into the registry + # - connects to the ZMQ proxy for client/agent communication + + self._start_registry() + self._init_registry() + self._init_cfg() # -------------------------------------------------------------------------- # - def _init_registry(self): - ''' - PRIMARY: create registry client and service - AGENT : create registry client and service - DEFAULT: create registry client only - ''' + def _init_secondary(self): - if self._role == self._PRIMARY and self._reg_addr: - raise ValueError('primary sessions do not accept registry address') + pass - # primary and agent_0 sessions run an inline registry service to share - # runtime config with their components (and sub-agents) - if self._role in [self._PRIMARY, self._AGENT_0]: + # -------------------------------------------------------------------------- + # + def _start_registry(self): - reg_service = ru.zmq.Registry(uid='%s.reg' % self._uid) - reg_service.start() + # make sure that no other registry is used + if self._reg_addr: + raise ValueError('cannot start registry when providing `reg_addr`') - self._reg_addr = reg_service.addr + self._reg_service = ru.zmq.Registry(uid='%s.reg' % self._uid) + self._reg_service.start() - assert self._reg_addr + self._reg_addr = self._reg_service.addr - # register the session ID as sanity check for non-primary sessions - self._reg = ru.zmq.RegistryClient(url=self._reg_addr) - self._reg.put('sid', self._uid) - else: + # -------------------------------------------------------------------------- + # + def _init_registry(self): - # non-primary sessions also connect a registry client - assert self._reg_addr - self._reg = ru.zmq.RegistryClient(url=self._reg_addr) + if not self._reg_addr: + raise ValueError('session needs a registry address') - # ensure the registry is up and valid - assert self._reg.get('sid') == self._uid + # register the session ID as sanity check for non-primary sessions + self._reg = ru.zmq.RegistryClient(url=self._reg_addr) + # -------------------------------------------------------------------------- + # + def _read_cfg(self): + + # NOTE: the `cfg` parameter to the c'tor is overloaded: it can be + # a config name (str) or a config dict to be merged into the + # default config. + cfg_name = 'default' + if isinstance(self._cfg, str): + cfg_name = self._cfg + self._cfg = None + + # load the named config, merge provided config + self._cfg = ru.Config('radical.pilot.session', name=cfg_name, + cfg=self._cfg) + + self._rcfgs = ru.Config('radical.pilot.resource', name='*', + expand=False) + + # expand recfgs for all schema options + # FIXME: this is ugly + for site in self._rcfgs: + for rcfg in self._rcfgs[site].values(): + for schema in rcfg.get('schemas', []): + while isinstance(rcfg.get(schema), str): + tgt = rcfg[schema] + rcfg[schema] = rcfg[tgt] # -------------------------------------------------------------------------- # def _init_cfg(self): @@ -292,23 +363,6 @@ def _init_cfg(self): # a predefined config and amend it at the same time. This might # be ok for the session, but introduces an API inconsistency. - if self._role == self._PRIMARY: - - cfg_name = 'default' - if isinstance(self._cfg, str): - cfg_name = self._cfg - self._cfg = None - - self._cfg = ru.Config('radical.pilot.session', name=cfg_name, cfg=self._cfg) - self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) - - for site in self._rcfgs: - for rcfg in self._rcfgs[site].values(): - for schema in rcfg.get('schemas', []): - while isinstance(rcfg.get(schema), str): - tgt = rcfg[schema] - rcfg[schema] = rcfg[tgt] - # make sure the cfg has the sid set self._cfg['sid'] = self._uid @@ -399,15 +453,14 @@ def _init_cfg(self): # -------------------------------------------------------------------------- # - def _init_proxy(self, proxy_url, proxy_host): + def _init_proxy(self): - self._log.debug('=== 4 %s', self._role) # need a proxy_url to connect to - get from arg or config (default cfg # pulls this from env) - if not proxy_url: - proxy_url = self._cfg.proxy_url + if not self._proxy_url: + self._proxy_url = self._cfg.proxy_url - if not proxy_url: + if not self._proxy_url: if self._role in [self._AGENT_0, self._AGENT_N, self._DEFAULT]: raise RuntimeError('proxy service URL missing') @@ -415,16 +468,16 @@ def _init_proxy(self, proxy_url, proxy_host): # start a temporary embedded service on the proxy host # (defaults to localhost on the default cfg) - if not proxy_host: - proxy_host = self._cfg.proxy_host + if not self._proxy_host: + self._proxy_host = self._cfg.proxy_host # NOTE: we assume ssh connectivity to the proxy host - but in fact # do allow proxy_host to be a full saga job service URL - if '://' in proxy_host: - proxy_host_url = ru.Url(proxy_host) + if '://' in self._proxy_host: + proxy_host_url = ru.Url(self._proxy_host) else: proxy_host_url = ru.Url() - proxy_host_url.set_host(proxy_host) + proxy_host_url.set_host(self._proxy_host) self._proxy_addr = None self._proxy_event = mt.Event() @@ -442,9 +495,9 @@ def _init_proxy(self, proxy_url, proxy_host): self._log.debug('=== 5 %s', self._role) if self._role == self._PRIMARY: self._rep.info ('< Date: Wed, 24 May 2023 00:34:37 +0200 Subject: [PATCH 049/171] response to comments, add missing file --- bin/radical-pilot-agent | 6 +++++- {docs/architecture => concepts}/registry.txt | 2 ++ {docs/architecture => concepts}/zmq/server.py | 0 src/radical/pilot/agent/agent_0.py | 10 +++++----- src/radical/pilot/utils/component.py | 3 +-- 5 files changed, 13 insertions(+), 8 deletions(-) rename {docs/architecture => concepts}/registry.txt (93%) rename {docs/architecture => concepts}/zmq/server.py (100%) diff --git a/bin/radical-pilot-agent b/bin/radical-pilot-agent index 3952af543b..f1f7b187b8 100755 --- a/bin/radical-pilot-agent +++ b/bin/radical-pilot-agent @@ -34,7 +34,8 @@ def bootstrap_3(aid): print("bootstrap agent %s" % aid) - agent = None + agent = None + reg_service = None try: setproctitle.setproctitle('rp.%s' % aid) @@ -73,6 +74,9 @@ def bootstrap_3(aid): # (essentially) main... print('finalize %s' % aid) + if reg_service: + reg_service.stop() + if agent: agent.stop() print('stopped %s' % aid) diff --git a/docs/architecture/registry.txt b/concepts/registry.txt similarity index 93% rename from docs/architecture/registry.txt rename to concepts/registry.txt index c4eee93175..e7a8aed582 100644 --- a/docs/architecture/registry.txt +++ b/concepts/registry.txt @@ -7,6 +7,8 @@ registry = { 'description': { ... }, 'rm_info' : { ... }, 'agent.0' : { '' } + 'cfg' : { ... }, + 'rcfg' : { ... } } } } diff --git a/docs/architecture/zmq/server.py b/concepts/zmq/server.py similarity index 100% rename from docs/architecture/zmq/server.py rename to concepts/zmq/server.py diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 1452e6f2d9..99e0f702af 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -193,9 +193,9 @@ def _configure_app_comm(self): 'stall_hwm': 1, 'log_level': 'error'} for ac in app_comm} for ac in app_comm: - if ac in self._cfg['bridges']: + if ac in self._reg['bridges']: raise ValueError('reserved app_comm name %s' % ac) - self._cfg['bridges'][ac] = app_comm[ac] + self._reg['bridges.%s' % ac] = app_comm[ac] # some of the bridge addresses also need to be exposed to the workload @@ -203,12 +203,12 @@ def _configure_app_comm(self): if 'task_environment' not in self._cfg: self._cfg['task_environment'] = dict() for ac in app_comm: - if ac not in self._cfg['bridges']: + if ac not in self._reg['bridges']: raise RuntimeError('missing app_comm %s' % ac) self._cfg['task_environment']['RP_%s_IN' % ac.upper()] = \ - self._cfg['bridges'][ac]['addr_in'] + self._reg['bridges.%s.ac' % ac]['addr_in'] self._cfg['task_environment']['RP_%s_OUT' % ac.upper()] = \ - self._cfg['bridges'][ac]['addr_out'] + self._reg['bridges.%s.addr_out' % ac] # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 63e488f0ea..e596b77311 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -421,8 +421,7 @@ def __init__(self, cfg, session): self._ctype = "%s.%s" % (self.__class__.__module__, self.__class__.__name__) - self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr, - pwd=self._sid) + self._reg = self._session._reg self._inputs = dict() # queues to get things from self._outputs = dict() # queues to send things to From 9fde14c8a85f8a6df5ca0d7f3e9ec9e5ddd766d9 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 24 May 2023 12:51:43 +0200 Subject: [PATCH 050/171] snap --- bin/radical-pilot-component | 3 +- .../pilot/configs/agent_default_sa.json | 5 - .../pilot/configs/agent_summit_sa.json | 5 +- .../pilot/configs/session_default.json | 5 +- src/radical/pilot/proxy.py | 14 +- src/radical/pilot/session.py | 346 +++++++++--------- src/radical/pilot/utils/component.py | 2 +- 7 files changed, 177 insertions(+), 203 deletions(-) diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 7c8e0bc546..2e91dc762e 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -80,7 +80,8 @@ def wrapped_main(sid, reg_addr, uid, log, prof): spt.setproctitle('rp.%s' % uid) # start a non-primary session - session = rp.Session(cfg=ccfg, _role=rp.Session._DEFAULT, _reg_addr=reg_addr) + session = rp.Session(uid=sid, cfg=ccfg, + _role=rp.Session._DEFAULT, _reg_addr=reg_addr) # create the component and begin to work comp = rp.utils.Component.create(ccfg, session) diff --git a/src/radical/pilot/configs/agent_default_sa.json b/src/radical/pilot/configs/agent_default_sa.json index 58541ad912..5f3e2f2c49 100644 --- a/src/radical/pilot/configs/agent_default_sa.json +++ b/src/radical/pilot/configs/agent_default_sa.json @@ -59,11 +59,6 @@ }, "components" : { - # the update worker must live in agent.0, since only that agent is - # sure to have connectivity toward the DB. - "update" : {"count" : 1 - "db_bulk_time" : 1.0, - "db_bulk_size" : 1024 }, "agent_staging_input" : {"count" : 1}, "agent_scheduling" : {"count" : 1}, # "agent_executing" : {"count" : 1}, diff --git a/src/radical/pilot/configs/agent_summit_sa.json b/src/radical/pilot/configs/agent_summit_sa.json index f78b49a1ea..e3b3ed94f5 100644 --- a/src/radical/pilot/configs/agent_summit_sa.json +++ b/src/radical/pilot/configs/agent_summit_sa.json @@ -36,13 +36,10 @@ }, "components" : { - # the update worker must live in agent.0, since only that agent is - # sure to have connectivity toward the DB. # "AgentStagingInputComponent" : {"count" : 1}, - "AgentSchedulingComponent" : {"count" : 1}, # "AgentExecutingComponent" : {"count" : 1}, # "AgentStagingOutputComponent" : {"count" : 1}, - "UpdateWorker" : {"count" : 1} + "AgentSchedulingComponent" : {"count" : 1} }, "agents": { diff --git a/src/radical/pilot/configs/session_default.json b/src/radical/pilot/configs/session_default.json index e5338fcbc3..03910c3ee4 100644 --- a/src/radical/pilot/configs/session_default.json +++ b/src/radical/pilot/configs/session_default.json @@ -24,10 +24,7 @@ "components" : { # how many instances of the respective components should be started - "update" : { "count" : 1 , - "db_bulk_time": 1.0, - "db_bulk_size": 1024 }, - "stager" : { "count" : 1 } + "stager" : { "count": 1 } } } diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py index e2a7a67ce7..97630db8a0 100644 --- a/src/radical/pilot/proxy.py +++ b/src/radical/pilot/proxy.py @@ -214,17 +214,17 @@ def _register(self, arg): proc.start() try: - data = q.get(timeout=10) + cfg = q.get(timeout=10) except queue.Empty as e: proc.terminate() raise RuntimeError('worker startup failed') from e self._clients[sid] = {'proc': proc, 'term': term, - 'data': data, + 'cfg' : cfg, 'hb' : time.time()} - return self._clients[sid]['data'] + return self._clients[sid]['cfg'] # -------------------------------------------------------------------------- @@ -260,7 +260,7 @@ def _worker(self, sid, q, term): proxy_sp.start() proxy_aq.start() - data = {'proxy_control_pubsub': {'pub': str(proxy_cp.addr_pub), + cfg = {'proxy_control_pubsub': {'pub': str(proxy_cp.addr_pub), 'sub': str(proxy_cp.addr_sub)}, 'proxy_state_pubsub' : {'pub': str(proxy_sp.addr_pub), 'sub': str(proxy_sp.addr_sub)}, @@ -268,7 +268,7 @@ def _worker(self, sid, q, term): 'get': str(proxy_aq.addr_get)}} # inform service about endpoint details - q.put(data) + q.put(cfg) # we run forever until we receive a termination command log.info('work') @@ -297,7 +297,7 @@ def _lookup(self, arg): if sid not in self._clients: raise RuntimeError('client %s not registered' % sid) - return self._clients[sid]['data'] + return self._clients[sid]['cfg'] # -------------------------------------------------------------------------- @@ -327,7 +327,7 @@ def _heartbeat(self, arg): with self._lock: if sid not in self._clients: - raise RuntimeError('client %s not registered' % sid) + raise RuntimeError('client %s not ' % sid) self._clients[sid]['hb'] = now diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 3ba4b91824..1414541743 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -94,12 +94,11 @@ class Session(rs.Session): # -------------------------------------------------------------------------- # - def __init__(self, proxy_url : Optional[str ] = None, - proxy_host : Optional[str ] = None, - uid : Optional[str ] = None, - cfg : Optional[dict] = None, - _role : Optional[str ] = _PRIMARY, - _reg_addr : Optional[str ] = None, + def __init__(self, proxy_url: Optional[str ] = None, + uid : Optional[str ] = None, + cfg : Optional[dict] = None, + _role : Optional[str ] = _PRIMARY, + _reg_addr: Optional[str ] = None, **close_options): """Create a new session. @@ -112,12 +111,9 @@ def __init__(self, proxy_url : Optional[str ] = None, A `proxy_url` can be specified which then must point to an RP Proxy Service instance which this session can use to establish a communication - proxy. Alternatively, a `proxy_host` can be specified - the session will - then attempt to start a proxy service on that host. If neither - `proxy_url` nor `proxy_host` are specified, the session will check for - the environment variables `RADICAL_PILOT_PROXY_URL` and - `RADICAL_PILOT_PROXY_HOST` (in that order) and will interpret them as - above. If none of these information is available, the session will + proxy. If `proxy_url` is not specified, the session will check for the + environment variables `RADICAL_PILOT_PROXY_URL` and will interpret it as + such above. If that information is not available, the session will instantiate a proxy service on the local host. Note that any proxy service instantiated by the session itself will be terminated once the session instance is closed or goes out of scope and is thus garbage @@ -135,12 +131,6 @@ def __init__(self, proxy_url : Optional[str ] = None, proxy service which is used to establish an RP communication proxy for this session. - proxy_host (str, optional): proxy host - alternative to the - `proxy_url`, the application can specify a host name on which - a temporary proxy is started by the session. This default to - `localhost` (but see remarks above about the interpretation of - environment variables). - uid (str, optional): Create a session with this UID. Session UIDs MUST be unique - otherwise they will lead to communication conflicts, resulting in undefined behaviours. @@ -178,7 +168,6 @@ def __init__(self, proxy_url : Optional[str ] = None, self._cfg = ru.Config(cfg=cfg) self._reg_addr = _reg_addr self._proxy_url = proxy_url - self._proxy_host = proxy_host self._closed = False self._created = time.time() self._close_options = _CloseOptions(close_options) @@ -190,7 +179,12 @@ def __init__(self, proxy_url : Optional[str ] = None, self._tmgrs = dict() # map IDs to tmgr instances self._cmgr = None # only primary sessions have a cmgr + # non-primary sessions need a uid! + if self._role != self._PRIMARY and not self._uid: + raise ValueError('non-primary session needs UID (%s)' % self._role) + # initialization is different for each session type + # NOTE: we could refactor this to session sub-classes if self._role == self._PRIMARY: # if user did not set a uid, we need to generate a new ID @@ -202,16 +196,15 @@ def __init__(self, proxy_url : Optional[str ] = None, elif self._role == self._AGENT_0: - if self._uid: - raise ValueError('non-primary sessions need a UID') - self._init_agent_0() - elif self._role in [self._AGENT_N, self._DEFAULT]: + elif self._role == self._AGENT_N: + + self._init_agent_n() - if self._uid: - raise ValueError('non-primary sessions need a UID') + + else: self._init_secondary() @@ -219,9 +212,6 @@ def __init__(self, proxy_url : Optional[str ] = None, # now we have config and uid - initialize base class (saga session) rs.Session.__init__(self, uid=self._uid) - # start bridges and components - self._init_components() - # cache sandboxes etc. self._cache_lock = ru.RLock() self._cache = {'endpoint_fs' : dict(), @@ -253,17 +243,31 @@ def _init_primary(self): # we still call `_init_cfg` to complete missing config settings # FIXME: completion only needed by `PRIMARY` - self._read_cfg() + self._init_cfg_from_scratch() # primary sessions create a registry service self._start_registry() - self._init_registry() + self._connect_registry() + + # cfg and reg setup is complete - push cfg to the registry - but push + # bridges, components and heartbeat subsections separately + flat_cfg = copy.deepcopy(self._cfg) + del flat_cfg['bridges'] + del flat_cfg['components'] + del flat_cfg['heartbeat'] - # store the session config in the new registry - self._reg.put('sid', self._uid) + self._reg['cfg'] = flat_cfg + self._reg['bridges'] = self._cfg.bridges + self._reg['components'] = self._cfg.components + self._reg['heartbeat'] = self._cfg.heartbeat + self._reg['rcfgs'] = self._rcfgs + self._reg.dump('first') - # only primary sessions and agent_0 connect to the ZMQ proxy - self._init_proxy() + # only primary sessions start and initialize the proxy service + self._start_proxy() + + # start bridges and components + self._init_components() # -------------------------------------------------------------------------- @@ -281,17 +285,40 @@ def _init_agent_0(self): # - resource config # - pushes them all into the registry # - connects to the ZMQ proxy for client/agent communication + # - start agent components self._start_registry() - self._init_registry() - self._init_cfg() + self._connect_registry() + self._init_cfg_from_registry() + self._connect_proxy() + self._init_components() + + + # -------------------------------------------------------------------------- + # + def _init_agent_n(self): + + # The agent_n session fetch their config from agent_0 registry + # + # - connect to registry + # - fetch config from registry + # - start agent components + + self._connect_registry() + self._init_cfg_from_registry() + self._init_components() # -------------------------------------------------------------------------- # def _init_secondary(self): - pass + # sub-agents and components connect to an existing registry (owned by + # the `primary` session or `agent_0`) and load config settings from + # there. + + self._connect_registry() + self._init_cfg_from_registry() # -------------------------------------------------------------------------- @@ -302,30 +329,37 @@ def _start_registry(self): if self._reg_addr: raise ValueError('cannot start registry when providing `reg_addr`') - self._reg_service = ru.zmq.Registry(uid='%s.reg' % self._uid) + self._reg_service = ru.zmq.Registry(uid='%s.reg' % self._uid, + path=self._cfg.path) self._reg_service.start() - self._reg_addr = self._reg_service.addr - + self._cfg.reg_addr = self._reg_service.addr # -------------------------------------------------------------------------- # - def _init_registry(self): + def _connect_registry(self): - if not self._reg_addr: + if not self._cfg.reg_addr: raise ValueError('session needs a registry address') - # register the session ID as sanity check for non-primary sessions - self._reg = ru.zmq.RegistryClient(url=self._reg_addr) + self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr) # -------------------------------------------------------------------------- # - def _read_cfg(self): + def _init_cfg_from_scratch(self): + + # A primary session will at this point have a registry client connected + # to its registry service. Further, self._cfg will either be a config + # name to be read from disk (`session_.json`), or a dictionary + # with a specific, user provided config. From this information clean up + # `self._cfg` and store it in the registry. Also read resource configs + # and store the in the registry as well. + + # NOTE: `cfg_name` and `cfg` are overloaded, the user cannot point to + # a predefined config and amend it at the same time. This might + # be ok for the session, but introduces an API inconsistency. - # NOTE: the `cfg` parameter to the c'tor is overloaded: it can be - # a config name (str) or a config dict to be merged into the - # default config. cfg_name = 'default' if isinstance(self._cfg, str): cfg_name = self._cfg @@ -335,10 +369,10 @@ def _read_cfg(self): self._cfg = ru.Config('radical.pilot.session', name=cfg_name, cfg=self._cfg) + # load the resource configs self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) - - # expand recfgs for all schema options + # expand rcfgs for all schema options # FIXME: this is ugly for site in self._rcfgs: for rcfg in self._rcfgs[site].values(): @@ -346,33 +380,10 @@ def _read_cfg(self): while isinstance(rcfg.get(schema), str): tgt = rcfg[schema] rcfg[schema] = rcfg[tgt] - # -------------------------------------------------------------------------- - # - def _init_cfg(self): - # At this point we have a UID and a valid registry client. Depending on - # session role, the session config is initialized in different ways: - # - # - PRIMARY: read from disk - # - AGENT : get cfg dict (agent config staged by client side) - # - DEFAULT: fetch from registry - # - # The same scheme holds for resource configs. - - # NOTE: `cfg_name` and `cfg` are overloaded, the user cannot point to - # a predefined config and amend it at the same time. This might - # be ok for the session, but introduces an API inconsistency. - - # make sure the cfg has the sid set + # set essential config values for *this* specific session self._cfg['sid'] = self._uid - # we have a minimal config and uid - initialize base class - rs.Session.__init__(self, uid=self._uid) - - # session path: where to store logfiles etc. - if self._cfg.path: self._path = self._cfg.path - else : self._path = '%s/%s' % (os.getcwd(), self._uid) - pwd = os.getcwd() if not self._cfg.base: @@ -402,149 +413,124 @@ def _init_cfg(self): self._log.info('radical.utils version: %s', ru.version_detail) self._log.debug('=== Session(%s, %s)', self._uid, self._role) - self._log.debug('\n'.join(ru.get_stacktrace())) self._prof.prof('session_start', uid=self._uid) self._rep.info ('< Date: Wed, 24 May 2023 14:27:29 +0200 Subject: [PATCH 051/171] session init --- concepts/registry.txt | 43 +++++++++++ {docs/architecture => concepts}/zmq/server.py | 0 docs/architecture/registry.txt | 44 ------------ src/radical/pilot/session.py | 71 +++++++++++++------ src/radical/pilot/utils/component.py | 12 +--- 5 files changed, 92 insertions(+), 78 deletions(-) create mode 100644 concepts/registry.txt rename {docs/architecture => concepts}/zmq/server.py (100%) delete mode 100644 docs/architecture/registry.txt diff --git a/concepts/registry.txt b/concepts/registry.txt new file mode 100644 index 0000000000..089eba8da5 --- /dev/null +++ b/concepts/registry.txt @@ -0,0 +1,43 @@ + +registry = { + 'cfg' : { ... }, # session config for this module + 'proxy' : { ... }, # proxy config for this run + 'rm_info' : { ... }, # only for agent modules + 'heartbeat' : { + 'addr_pub': ... , + 'addr_sub': ... , + 'cfg' : { + 'interval': ... , + 'timeout' : ... + } + }, + 'bridges' : { + 'abc_pubsub': { + 'addr_pub': ... , + 'addr_sub': ... , + 'cfg' : { + 'log_lvl' : ... , + 'hwm' : ... , + 'bulk_size': ... + } + }, + 'xyz_queue': { + 'addr_put': ... , + 'addr_get': ... , + 'cfg' : { + 'log_lvl' : ... , + 'hwm' : ... , + 'bulk_size': ... + } + } + }, + 'components': { + 'staging.0001': { + 'cfg': { + 'log_lvl': ... + } + } + } + 'rcfgs' : { ... } +} + diff --git a/docs/architecture/zmq/server.py b/concepts/zmq/server.py similarity index 100% rename from docs/architecture/zmq/server.py rename to concepts/zmq/server.py diff --git a/docs/architecture/registry.txt b/docs/architecture/registry.txt deleted file mode 100644 index c4eee93175..0000000000 --- a/docs/architecture/registry.txt +++ /dev/null @@ -1,44 +0,0 @@ - -registry = { - '' : { - '' - 'heartbeat' : { ... }, - 'pilot.0000': { - 'description': { ... }, - 'rm_info' : { ... }, - 'agent.0' : { '' } - } - } -} - -SUB_REGISTRY = { - 'cmgr.0000' : { - 'heartbeat' : { - 'addr_pub': ... , - 'addr_sub': ... , - 'cfg' : { - 'interval': ... , - 'timeout' : ... - } - }, - 'bridges' : { - 'state_pubsub': { - 'addr_pub': ... , - 'addr_sub': ... , - 'cfg' : { - 'log_lvl' : ..., - 'hwm' : ..., - 'bulk_size': ... - } - } - }, - 'components': { - 'staging.0001': { - 'cfg': { - 'log_lvl': ... - } - } - } - } -} - diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 1414541743..00d3133b13 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -249,23 +249,12 @@ def _init_primary(self): self._start_registry() self._connect_registry() - # cfg and reg setup is complete - push cfg to the registry - but push - # bridges, components and heartbeat subsections separately - flat_cfg = copy.deepcopy(self._cfg) - del flat_cfg['bridges'] - del flat_cfg['components'] - del flat_cfg['heartbeat'] - - self._reg['cfg'] = flat_cfg - self._reg['bridges'] = self._cfg.bridges - self._reg['components'] = self._cfg.components - self._reg['heartbeat'] = self._cfg.heartbeat - self._reg['rcfgs'] = self._rcfgs - self._reg.dump('first') - # only primary sessions start and initialize the proxy service self._start_proxy() + # push the session config into the registry + self._publish_cfg() + # start bridges and components self._init_components() @@ -448,6 +437,25 @@ def _init_cfg_from_registry(self): self._prof.prof('session_start', uid=self._uid) + # -------------------------------------------------------------------------- + # + def _publish_cfg(self): + + # the primary session and agent_0 push their configs into the registry + + assert self._role in [self._PRIMARY, self._AGENT_0] + + # push proxy, bridges, components and heartbeat subsections separately + flat_cfg = copy.deepcopy(self._cfg) + del flat_cfg['bridges'] + del flat_cfg['components'] + + self._reg['cfg'] = flat_cfg + self._reg['bridges'] = {} + self._reg['components'] = {} + self._reg.dump('stored') + self._reg['rcfgs'] = self._rcfgs + # -------------------------------------------------------------------------- # def _start_proxy(self): @@ -489,9 +497,9 @@ def _start_proxy(self): self._proxy = ru.zmq.Client(url=self._cfg.proxy_url) proxy_cfg = self._proxy.request('register', {'sid': self._uid}) - # push the config to the registry - self._reg.put('proxy', proxy_cfg) - self._log.debug('proxy config: %s', proxy_cfg) + self._cfg.proxy = ru.Config(cfg=proxy_cfg) + + self._log.debug('proxy config: %s', self._cfg.proxy) except: self._log.exception('%s: failed to start proxy', self._role) @@ -534,15 +542,24 @@ def _init_components(self): # make sure we send heartbeats to the proxy self._run_proxy_hb() - pwd = self._cfg.path - # forward any control messages to the proxy def fwd_control(topic, msg): self._log.debug('=== fwd control %s: %s', topic, msg) self._proxy_ctrl_pub.put(rpc.PROXY_CONTROL_PUBSUB, msg) - self._proxy_ctrl_pub = ru.zmq.Publisher(rpc.PROXY_CONTROL_PUBSUB, path=pwd) - self._ctrl_sub = ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, path=pwd) + ru.write_json(fname='foo.json', data=self._reg['bridges']) + ru.write_json(fname='cfg.json', data=self._cfg) + + self._proxy_ctrl_pub = ru.zmq.Publisher( + channel=rpc.PROXY_CONTROL_PUBSUB, + url=self._cfg.proxy.proxy_control_pubsub.pub, + path=self._cfg.path) + + self._ctrl_sub = ru.zmq.Subscriber( + channel=rpc.CONTROL_PUBSUB, + url=self._reg['bridges.control_pubsub.sub'], + path=self._cfg.path) + self._ctrl_sub.subscribe(rpc.CONTROL_PUBSUB, fwd_control) # collect any state updates from the proxy @@ -550,8 +567,16 @@ def fwd_state(topic, msg): self._log.debug('=== fwd state %s: %s', topic, msg) self._state_pub.put(topic, msg) - self._state_pub = ru.zmq.Publisher(rpc.STATE_PUBSUB, path=pwd) - self._proxy_state_sub = ru.zmq.Subscriber(rpc.PROXY_STATE_PUBSUB, path=pwd) + self._state_pub = ru.zmq.Publisher( + channel=rpc.STATE_PUBSUB, + url=self._reg['bridges.state_pubsub.pub'], + path=self._cfg.path) + + self._proxy_state_sub = ru.zmq.Subscriber( + channel=rpc.PROXY_STATE_PUBSUB, + url=self._cfg.proxy.proxy_state_pubsub.sub, + path=self._cfg.path) + self._proxy_state_sub.subscribe(rpc.PROXY_STATE_PUBSUB, fwd_state) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index a408f599e8..fa184edb73 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -97,8 +97,6 @@ def __init__(self, sid, reg_addr, owner): self._cfg.heartbeat = hb_cfg self._reg['cfg.heartbeat'] = hb_cfg - self._reg.dump(self._uid) - # runs a HB monitor on that channel self._hb = ru.Heartbeat(uid=self.uid, timeout=hb_cfg['timeout'], @@ -158,7 +156,7 @@ def _hb_term_cb(self, uid=None): # terminate and suicidally kill the very process it is living in. # Make sure all required cleanup is done at this point! - return None + return False # -------------------------------------------------------------------------- @@ -172,9 +170,6 @@ def uid(self): # def start_bridges(self, bridges): - if 'bridges' not in self._reg: - self._reg['bridges'] = dict() - self._prof.prof('start_bridges_start', uid=self._uid) timeout = self._cfg.heartbeat.timeout @@ -223,12 +218,7 @@ def start_bridges(self, bridges): # def start_components(self, components, cfg = None): - if 'components' not in self._reg: - self._reg['components'] = dict() - self._prof.prof('start_components_start: %s', uid=self._uid) - import pprint - self._log.debug('=== cmgr: %s', pprint.pformat(self._cfg.as_dict())) timeout = self._cfg.heartbeat.timeout From c23bcedaf62a8eaf74f5246706cad0ee6d279999 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 24 May 2023 17:32:39 +0200 Subject: [PATCH 052/171] agent starts up --- concepts/registry.txt | 1 - examples/00_getting_started.py | 2 +- src/radical/pilot/pilot_manager.py | 2 +- src/radical/pilot/pmgr/launching/base.py | 2 + src/radical/pilot/session.py | 23 ++++++---- src/radical/pilot/task_manager.py | 2 + .../pilot/tmgr/staging_input/default.py | 25 ++++++----- src/radical/pilot/utils/component.py | 43 +++++++++++-------- 8 files changed, 59 insertions(+), 41 deletions(-) diff --git a/concepts/registry.txt b/concepts/registry.txt index 089eba8da5..4ded51ca93 100644 --- a/concepts/registry.txt +++ b/concepts/registry.txt @@ -1,7 +1,6 @@ registry = { 'cfg' : { ... }, # session config for this module - 'proxy' : { ... }, # proxy config for this run 'rm_info' : { ... }, # only for agent modules 'heartbeat' : { 'addr_pub': ... , diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 7624bbacd3..2cad3b9fa3 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -67,7 +67,7 @@ pilot = pmgr.submit_pilots(pdesc) - n = 1024 * 1024 # number of tasks to run + n = 16 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 810f9bc042..e861f45fd1 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -88,7 +88,7 @@ def __init__(self, session, uid=None, cfg='default'): """ - assert session.primary, 'pmgr needs primary session' + assert session._role == session._PRIMARY, 'pmgr needs primary session' # initialize the base class (with no intent to fork) if uid: diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index 0353f78967..5934e0d095 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -273,6 +273,8 @@ def _kill_pilots(self, pids): # def work(self, pilots): + self._log.debug('==== %s', pilots) + if not isinstance(pilots, list): pilots = [pilots] diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 00d3133b13..9fda2b6dc0 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -228,6 +228,8 @@ def __init__(self, proxy_url: Optional[str ] = None, if self._role == self._PRIMARY: self._rep.ok('>>ok\n') + self._reg.dump('sinit.%s' % self._role) + # -------------------------------------------------------------------------- # @@ -401,8 +403,6 @@ def _init_cfg_from_scratch(self): self._log.info('radical.saga version: %s', rs.version_detail) self._log.info('radical.utils version: %s', ru.version_detail) - self._log.debug('=== Session(%s, %s)', self._uid, self._role) - self._prof.prof('session_start', uid=self._uid) self._rep.info ('<= self._mkdir_threshold: self._log.debug('tar %d sboxes', len(task_sboxes)) + session_sbox = self._session._get_session_sandbox(pilot) + # no matter the bulk mechanism, we need a SAGA handle to the # remote FS sbox_fs = ru.Url(session_sbox) # deep copy @@ -292,17 +294,18 @@ def work(self, tasks): self._advance_tasks(no_staging_tasks[pid], pid) to_fail = list() - for task,actionables in staging_tasks: - try: - self._handle_task(task, actionables) - self._advance_tasks([task], pid) - - except Exception as e: - # staging failed - do not pass task to agent - task['control'] = 'tmgr' - task['exception'] = repr(e) - task['exception_detail'] = '\n'.join(ru.get_exception_trace()) - to_fail.append(task) + for pid in staging_tasks: + for task,actionables in staging_tasks[pid]: + try: + self._handle_task(task, actionables) + self._advance_tasks([task], pid) + + except Exception as e: + # staging failed - do not pass task to agent + task['control'] = 'tmgr' + task['exception'] = repr(e) + task['exception_detail'] = '\n'.join(ru.get_exception_trace()) + to_fail.append(task) self._advance_tasks(to_fail, state=rps.FAILED, push=False) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index fa184edb73..5f07ecb688 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -76,6 +76,8 @@ def __init__(self, sid, reg_addr, owner): self._prof.prof('init2', uid=self._uid, msg=self._cfg.path) + self._log.debug('=== cmgr %s (%s)', self._uid, self._owner) + # Every ComponentManager runs a HB pubsub bridge in a separate thread. # That HB channel should be used by all components and bridges created # under this CMGR. @@ -230,11 +232,11 @@ def start_components(self, components, cfg = None): ru.ID_CUSTOM, ns=self._sid) ccfg.uid = uid ccfg.kind = cname + ccfg.owner = self._owner ccfg.sid = self._cfg.sid ccfg.cmgr = self._cfg.uid ccfg.base = self._cfg.base ccfg.path = self._cfg.path - ccfg.owner = self._cfg.owner ccfg.reg_addr = self._cfg.reg_addr ccfg.proxy_url = self._cfg.proxy_url ccfg.heartbeat = self._cfg.heartbeat @@ -813,6 +815,7 @@ def get_input_ep(self, qname): cfg = self._reg['bridges'][qname] + self._log.debug('====== get input ep: %s', qname) return ru.zmq.Getter(qname, url=cfg['get']) @@ -1048,15 +1051,19 @@ def work_cb(self): # TODO: should a poller over all inputs, or better yet register # a callback + # import pprint + # pprint.pprint(self._inputs) + for name in self._inputs: + qname = self._inputs[name]['qname'] queue = self._inputs[name]['queue'] states = self._inputs[name]['states'] # FIXME: a simple, 1-thing caching mechanism would likely # remove the req/res overhead completely (for any # non-trivial worker). - things = queue.get_nowait(qname=name, timeout=200) # microseconds + things = queue.get_nowait(qname=qname, timeout=200) # microseconds # self._log.debug('work_cb %s: %s %s %d', name, queue.channel, # qname, len(things)) things = ru.as_list(things) @@ -1243,37 +1250,37 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, # ts = time.time() if _state in rps.FINAL: # things in final state are dropped - # for thing in _things: - # self._log.debug('=== final %s [%s]', thing['uid'], _state) - # self._prof.prof('drop', uid=thing['uid'], state=_state, - # ts=ts) + for thing in _things: + self._log.debug('=== final %s [%s]', thing['uid'], _state) + self._prof.prof('drop', uid=thing['uid'], state=_state, + ts=ts) continue if _state not in self._outputs: # unknown target state -- error - # for thing in _things: - # self._log.debug("lost %s [%s] : %s", thing['uid'], - # _state, self._outputs) - # self._prof.prof('lost', uid=thing['uid'], state=_state, - # ts=ts) + for thing in _things: + self._log.debug("lost %s [%s] : %s", thing['uid'], + _state, self._outputs) + self._prof.prof('lost', uid=thing['uid'], state=_state, + ts=ts) continue if not self._outputs[_state]: # empty output -- drop thing - # for thing in _things: - # self._log.debug('=== drop %s [%s]', thing['uid'], _state) - # self._prof.prof('drop', uid=thing['uid'], state=_state, - # ts=ts) + for thing in _things: + self._log.debug('=== drop %s [%s]', thing['uid'], _state) + self._prof.prof('drop', uid=thing['uid'], state=_state, + ts=ts) continue output = self._outputs[_state] # push the thing down the drain - # self._log.debug('=== put bulk %s: %s: %s', _state, len(_things), - # output.channel) + self._log.debug('=== put bulk %s: %s: %s', _state, len(_things), + output.channel) output.put(_things, qname=qname) - ts = time.time() + # ts = time.time() # for thing in _things: # self._prof.prof('put', uid=thing['uid'], state=_state, # msg=output.name, ts=ts) From 7575bf77209edd8c14192112f16b315f3338143e Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 25 May 2023 10:10:27 +0200 Subject: [PATCH 053/171] update registry layout --- concepts/registry.txt | 60 ++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/concepts/registry.txt b/concepts/registry.txt index e7a8aed582..4ded51ca93 100644 --- a/concepts/registry.txt +++ b/concepts/registry.txt @@ -1,46 +1,42 @@ registry = { - '' : { - '' - 'heartbeat' : { ... }, - 'pilot.0000': { - 'description': { ... }, - 'rm_info' : { ... }, - 'agent.0' : { '' } - 'cfg' : { ... }, - 'rcfg' : { ... } + 'cfg' : { ... }, # session config for this module + 'rm_info' : { ... }, # only for agent modules + 'heartbeat' : { + 'addr_pub': ... , + 'addr_sub': ... , + 'cfg' : { + 'interval': ... , + 'timeout' : ... } - } -} - -SUB_REGISTRY = { - 'cmgr.0000' : { - 'heartbeat' : { + }, + 'bridges' : { + 'abc_pubsub': { 'addr_pub': ... , 'addr_sub': ... , 'cfg' : { - 'interval': ... , - 'timeout' : ... + 'log_lvl' : ... , + 'hwm' : ... , + 'bulk_size': ... } }, - 'bridges' : { - 'state_pubsub': { - 'addr_pub': ... , - 'addr_sub': ... , - 'cfg' : { - 'log_lvl' : ..., - 'hwm' : ..., - 'bulk_size': ... - } + 'xyz_queue': { + 'addr_put': ... , + 'addr_get': ... , + 'cfg' : { + 'log_lvl' : ... , + 'hwm' : ... , + 'bulk_size': ... } - }, - 'components': { - 'staging.0001': { - 'cfg': { - 'log_lvl': ... - } + } + }, + 'components': { + 'staging.0001': { + 'cfg': { + 'log_lvl': ... } } } + 'rcfgs' : { ... } } From 294c37373d144bb94280d24c0605d902668a6abc Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 26 May 2023 11:10:56 +0200 Subject: [PATCH 054/171] snap --- bin/radical-pilot-agent | 17 +--- src/radical/pilot/agent/agent_0.py | 71 ++++++----------- src/radical/pilot/agent/agent_n.py | 14 ++-- src/radical/pilot/agent/executing/base.py | 2 +- src/radical/pilot/session.py | 95 ++++++++++++++++++----- src/radical/pilot/utils/component.py | 4 +- 6 files changed, 117 insertions(+), 86 deletions(-) diff --git a/bin/radical-pilot-agent b/bin/radical-pilot-agent index e28e268553..126e909a9a 100755 --- a/bin/radical-pilot-agent +++ b/bin/radical-pilot-agent @@ -41,22 +41,13 @@ def bootstrap_3(aid): cfg = ru.Config(path='%s.cfg' % aid) - # this script runs the registry for the agent instances - reg_service = ru.zmq.Registry(uid=aid + '.reg') - reg_service.start() - - cfg.reg_addr = reg_service.addr - - assert cfg.reg_addr - - # start a non-primary session (the agents will own their cmgrs) - session = rp.Session(cfg=cfg, _role=rp.Session._AGENT, _reg_addr=cfg.reg_addr) - cfg.uid = aid cfg.aid = aid # used by executor - if aid == 'agent.0': agent = rp.Agent_0(cfg, session) - else : agent = rp.Agent_n(cfg, session) + if aid == 'agent.0': + agent = rp.Agent_0(cfg) + else: + agent = rp.Agent_n(cfg) agent.start() diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index a5933547bc..b0ebd29b43 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -40,7 +40,7 @@ class Agent_0(rpu.Worker): # -------------------------------------------------------------------------- # - def __init__(self, cfg: ru.Config, session: Session): + def __init__(self, cfg: ru.Config): self._uid = 'agent.0' self._pid = cfg.pid @@ -48,24 +48,14 @@ def __init__(self, cfg: ru.Config, session: Session): self._pmgr = cfg.pmgr self._pwd = cfg.pilot_sandbox - self._session = session + self._session = Session(uid=cfg.sid, cfg=cfg, _role=Session._AGENT_0) + self._cfg = self._session._cfg + self._rcfg = self._session._rcfg self._log = ru.Logger(self._uid, ns='radical.pilot') self._starttime = time.time() self._final_cause = None - # pick up proxy config from session - self._cfg.proxy = self._session._cfg.proxy - - # extract bridges, components and resource_cfg subsections from the cfg - self._bcfg = cfg.bridges - self._ccfg = cfg.components - self._rcfg = cfg.resource_cfg - - del cfg['bridges'] - del cfg['components'] - del cfg['resource_cfg'] - # keep some state about service startups self._service_uids_launched = list() self._service_uids_running = list() @@ -76,11 +66,7 @@ def __init__(self, cfg: ru.Config, session: Session): self._prof.prof('hostname', uid=cfg.pid, msg=ru.get_hostname()) # init the worker / component base classes, connects registry - rpu.Worker.__init__(self, cfg, session) - - # store the agent config in the registry - self._reg['cfg'] = self._cfg - self._reg['rcfg'] = self._rcfg + rpu.Worker.__init__(self, cfg, self._session) # configure ResourceManager before component startup, as components need # ResourceManager information for function (scheduler, executor) @@ -89,16 +75,6 @@ def __init__(self, cfg: ru.Config, session: Session): # ensure that app communication channels are visible to workload self._configure_app_comm() - # ready to configure agent components - self._cmgr = rpu.ComponentManager(self._cfg.sid, self._cfg.reg_addr, - self._uid) - - self._cmgr.start_bridges(self._bcfg) - self._cmgr.start_components(self._ccfg) - - # connect to proxy communication channels, maybe - self._connect_proxy() - # before we run any tasks, prepare a named_env `rp` for tasks which use # the pilot's own environment, such as raptors or RP service tasks env_spec = {'type': os.environ['RP_VENV_TYPE'], @@ -149,6 +125,9 @@ def __init__(self, cfg: ru.Config, session: Session): # as long as we are alive, we also want to keep the proxy alive self._session._run_proxy_hb() + # all set up - connect to proxy to fetch / push tasks + self._connect_proxy() + # -------------------------------------------------------------------------- # @@ -258,7 +237,7 @@ def _configure_rm(self): # use for sub-agent startup. Add the remaining ResourceManager # information to the config, for the benefit of the scheduler). - self._rm = ResourceManager.create(name=self._cfg.resource_manager, + self._rm = ResourceManager.create(name=self._rcfg.resource_manager, cfg=self._cfg, rcfg=self._rcfg, log=self._log, prof=self._prof) @@ -273,29 +252,27 @@ def _configure_app_comm(self): # channels, merge those into the agent config # # FIXME: this needs to start the app_comm bridges - app_comm = self._cfg.get('app_comm') + app_comm = self._rcfg.get('app_comm') if app_comm: + + # bridge addresses also need to be exposed to the workload + if 'task_environment' not in self._rcfg: + self._rcfg['task_environment'] = dict() + if isinstance(app_comm, list): app_comm = {ac: {'bulk_size': 0, 'stall_hwm': 1, 'log_level': 'error'} for ac in app_comm} for ac in app_comm: - if ac in self._cfg['bridges']: + AC = ac.upper() + + if ac in self._reg['bridges']: raise ValueError('reserved app_comm name %s' % ac) - self._cfg['bridges'][ac] = app_comm[ac] + self._reg['bridges.%s' % ac] = app_comm[ac] - # some of the bridge addresses also need to be exposed to the workload - if app_comm: - if 'task_environment' not in self._cfg: - self._cfg['task_environment'] = dict() - for ac in app_comm: - if ac not in self._cfg['bridges']: - raise RuntimeError('missing app_comm %s' % ac) - self._cfg['task_environment']['RP_%s_IN' % ac.upper()] = \ - self._cfg['bridges'][ac]['addr_in'] - self._cfg['task_environment']['RP_%s_OUT' % ac.upper()] = \ - self._cfg['bridges'][ac]['addr_out'] + self._rcfg['task_environment']['RP_%s_IN' % AC] = ac['addr_in'] + self._rcfg['task_environment']['RP_%s_OUT' % AC] = ac['addr_out'] # -------------------------------------------------------------------------- @@ -408,13 +385,13 @@ def _write_sa_configs(self): # sub-agent config files. # write deep-copies of the config for each sub-agent (sans from agent.0) - for sa in self._cfg.get('agents', {}): + for sa in self._rcfg.get('agents', {}): assert (sa != 'agent.0'), 'expect subagent, not agent.0' # use our own config sans agents/components/bridges as a basis for # the sub-agent config. - tmp_cfg = copy.deepcopy(self._cfg) + tmp_cfg = copy.deepcopy(self._session._cfg) tmp_cfg['agents'] = dict() tmp_cfg['components'] = dict() tmp_cfg['bridges'] = dict() @@ -431,7 +408,7 @@ def _write_sa_configs(self): # def _start_services(self): - service_descriptions = self._cfg.services + service_descriptions = self._rcfg.services if not service_descriptions: return self._log.info('starting agent services') diff --git a/src/radical/pilot/agent/agent_n.py b/src/radical/pilot/agent/agent_n.py index f54c0df90c..caf41b0d24 100644 --- a/src/radical/pilot/agent/agent_n.py +++ b/src/radical/pilot/agent/agent_n.py @@ -5,7 +5,9 @@ import time import radical.utils as ru -from .. import utils as rpu +from .. import utils as rpu + +from .. import Session # ------------------------------------------------------------------------------ @@ -18,7 +20,7 @@ class Agent_n(rpu.Worker): # -------------------------------------------------------------------------- # - def __init__(self, cfg, session): + def __init__(self, cfg): self._cfg = cfg self._sid = cfg.sid @@ -28,11 +30,11 @@ def __init__(self, cfg, session): self._sid = cfg.sid self._reg_addr = cfg.reg_addr + self._session = Session(uid=cfg.sid, cfg=cfg, _role=Session._AGENT_N) # log / profile via session until component manager is initialized - self._session = session - self._log = session._log - self._prof = session._prof + self._log = self._session._log + self._prof = self._session._prof self._starttime = time.time() self._final_cause = None @@ -44,7 +46,7 @@ def __init__(self, cfg, session): # at this point the session is up and connected, and it should have # brought up all communication bridges and components. We are # ready to rumble! - rpu.Worker.__init__(self, self._cfg, session) + rpu.Worker.__init__(self, self._cfg, self._session) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/agent/executing/base.py b/src/radical/pilot/agent/executing/base.py index a7d0c8f30c..fa585b8450 100644 --- a/src/radical/pilot/agent/executing/base.py +++ b/src/radical/pilot/agent/executing/base.py @@ -50,7 +50,7 @@ def create(cls, cfg, session): if cls != AgentExecutingComponent: raise TypeError('Factory only available to base class!') - name = session._reg['rcfg.agent_spawner'] + name = session._rcfg['agent_spawner'] from .popen import Popen from .flux import Flux diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 9fda2b6dc0..a7bba46830 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -168,6 +168,7 @@ def __init__(self, proxy_url: Optional[str ] = None, self._cfg = ru.Config(cfg=cfg) self._reg_addr = _reg_addr self._proxy_url = proxy_url + self._proxy_cfg = None self._closed = False self._created = time.time() self._close_options = _CloseOptions(close_options) @@ -278,9 +279,10 @@ def _init_agent_0(self): # - connects to the ZMQ proxy for client/agent communication # - start agent components + self._init_cfg_from_dict() self._start_registry() self._connect_registry() - self._init_cfg_from_registry() + self._publish_cfg() self._connect_proxy() self._init_components() @@ -326,6 +328,7 @@ def _start_registry(self): self._cfg.reg_addr = self._reg_service.addr + # -------------------------------------------------------------------------- # def _connect_registry(self): @@ -361,6 +364,7 @@ def _init_cfg_from_scratch(self): cfg=self._cfg) # load the resource configs + self._rcfg = ru.Config() # the local resource config, if known self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) # expand rcfgs for all schema options @@ -409,12 +413,58 @@ def _init_cfg_from_scratch(self): self._rep.plain('[%s]' % self._uid) + # -------------------------------------------------------------------------- + # + def _init_cfg_from_dict(self): + + # A agent_0 session will read the configuration from agent_0.cfg and + # pass it to the session. + + assert self._role == self._AGENT_0 + + self._cfg = ru.Config(cfg=self._cfg) + + # we only have one resource config for the current resource + self._rcfg = ru.Config(cfg=self._cfg.resource_cfg) # local config + self._rcfgs = ru.Config() + + del self._cfg['resource_cfg'] + + # set essential config values for *this* specific session + pwd = os.getcwd() + + if not self._cfg.base: + self._cfg.base = pwd + + if not self._cfg.path: + self._cfg.path = pwd + + # change RU defaults to point logfiles etc. to the session sandbox + def_cfg = ru.DefaultConfig() + def_cfg.log_dir = self._cfg.path + def_cfg.report_dir = self._cfg.path + def_cfg.profile_dir = self._cfg.path + + self._prof = self._get_profiler(name=self._uid) + self._rep = self._get_reporter(name=self._uid) + self._log = self._get_logger (name=self._uid, + level=self._cfg.get('debug')) + + from . import version_detail as rp_version_detail + self._log.info('radical.pilot version: %s', rp_version_detail) + self._log.info('radical.saga version: %s', rs.version_detail) + self._log.info('radical.utils version: %s', ru.version_detail) + + self._prof.prof('session_start', uid=self._uid) + + # -------------------------------------------------------------------------- # def _init_cfg_from_registry(self): # fetch config settings from the registry self._cfg = ru.Config(cfg=self._reg['cfg']) + self._rcfg = ru.Config(cfg=self._reg['rcfg']) self._rcfgs = ru.Config(cfg=self._reg['rcfgs']) # change RU defaults to point logfiles etc. to the session sandbox @@ -449,15 +499,25 @@ def _publish_cfg(self): # push proxy, bridges, components and heartbeat subsections separately flat_cfg = copy.deepcopy(self._cfg) - proxy_cfg = self._cfg.proxy del flat_cfg['bridges'] del flat_cfg['components'] self._reg['cfg'] = flat_cfg - self._reg['bridges'] = proxy_cfg + self._reg['bridges'] = self._cfg.bridges # proxy bridges self._reg['components'] = {} - self._reg['rcfgs'] = self._rcfgs + + # primary sessions publish all known resource configs under `rcfgs`, the + # agent_0 only publishes the *current* resource config under `rcfg`. + if self._role == self._PRIMARY: + self._reg['rcfg'] = dict() + self._reg['rcfgs'] = self._rcfgs + + elif self._role == self._AGENT_0: + self._reg['rcfg'] = self._rcfg + self._reg['rcfgs'] = dict() + + self._reg.dump('published') # -------------------------------------------------------------------------- @@ -499,11 +559,7 @@ def _start_proxy(self): # configure proxy channels try: self._proxy = ru.zmq.Client(url=self._cfg.proxy_url) - proxy_cfg = self._proxy.request('register', {'sid': self._uid}) - - self._cfg.proxy = ru.Config(cfg=proxy_cfg) - - self._log.debug('proxy config: %s', self._cfg.proxy) + self._proxy_cfg = self._proxy.request('register', {'sid': self._uid}) except: self._log.exception('%s: failed to start proxy', self._role) @@ -517,16 +573,12 @@ def _connect_proxy(self): assert self._role == self._AGENT_0 # make sure we have a proxy address to use - assert self._proxy_url + assert self._cfg.proxy_url # query the proxy service to fetch proxy cfg created by primary session self._proxy = ru.zmq.Client(url=self._cfg.proxy_url) - proxy_cfg = self._proxy.request('lookup', {'sid': self._uid}) - self._log.debug('proxy response: %s', proxy_cfg) - - # push the proxy config as bridge to the agent_0 registry - for channel in proxy_cfg: - self._reg['bridges.%s' % channel] = proxy_cfg[channel] + self._proxy_cfg = self._proxy.request('lookup', {'sid': self._uid}) + self._log.debug('proxy response: %s', self._proxy_cfg) # -------------------------------------------------------------------------- @@ -542,6 +594,13 @@ def _init_components(self): self._cmgr.start_bridges(self._cfg.bridges) self._cmgr.start_components(self._cfg.components) + # if we have proxy channels, publish them in the bridges configs too + # push the proxy config as bridge to the agent_0 registry + if self._proxy_cfg: + for channel in self._proxy_cfg: + self._reg['bridges.%s' % channel] = self._proxy_cfg[channel] + + self._reg.dump('%s.bridges' % self._role) # make sure we send heartbeats to the proxy @@ -557,7 +616,7 @@ def fwd_control(topic, msg): self._proxy_ctrl_pub = ru.zmq.Publisher( channel=rpc.PROXY_CONTROL_PUBSUB, - url=self._cfg.proxy.proxy_control_pubsub.pub, + url=self._reg['bridges.proxy_control_pubsub.pub'], path=self._cfg.path) self._ctrl_sub = ru.zmq.Subscriber( @@ -579,7 +638,7 @@ def fwd_state(topic, msg): self._proxy_state_sub = ru.zmq.Subscriber( channel=rpc.PROXY_STATE_PUBSUB, - url=self._cfg.proxy.proxy_state_pubsub.sub, + url=self._reg['bridges.proxy_state_pubsub.sub'], path=self._cfg.path) self._proxy_state_sub.subscribe(rpc.PROXY_STATE_PUBSUB, fwd_state) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 5f07ecb688..a649b8ea66 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -537,7 +537,9 @@ def create(cfg, session): assert cfg.kind in comp, '%s not in %s (%s)' % (cfg.kind, list(comp.keys()), cfg.uid) - session._log.debug('create 1 %s: %s', cfg.kind, comp[cfg.kind]) + import pprint + session._log.debug('create 1 %s: %s', cfg.kind, + pprint.pformat(cfg.as_dict())) return comp[cfg.kind].create(cfg, session) From 5f6d79468d8ed2b2f04670089e001f9cf92ec5fc Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 30 May 2023 17:45:07 +0200 Subject: [PATCH 055/171] end-to-end --- src/radical/pilot/agent/agent_0.py | 13 ++++--------- src/radical/pilot/agent/bootstrap_0.sh | 8 ++++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index b0ebd29b43..6c2d1c7ec4 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -75,12 +75,6 @@ def __init__(self, cfg: ru.Config): # ensure that app communication channels are visible to workload self._configure_app_comm() - # before we run any tasks, prepare a named_env `rp` for tasks which use - # the pilot's own environment, such as raptors or RP service tasks - env_spec = {'type': os.environ['RP_VENV_TYPE'], - 'path': os.environ['RP_VENV_PATH']} - self._prepare_env('rp', env_spec) - # start any services if they are requested self._start_services() @@ -150,9 +144,9 @@ def _hb_term_cb(self, msg=None): # def _connect_proxy(self): - # write config files for proxy channels - for p in self._cfg.proxy: - ru.write_json('%s.cfg' % p, self._cfg.proxy[p]) + # # write config files for proxy channels + # for p in self._cfg.proxy: + # ru.write_json('%s.cfg' % p, self._cfg.proxy[p]) # listen for new tasks from the client self.register_input(rps.AGENT_STAGING_INPUT_PENDING, @@ -237,6 +231,7 @@ def _configure_rm(self): # use for sub-agent startup. Add the remaining ResourceManager # information to the config, for the benefit of the scheduler). + self._cfg.reg_addr = self._session.reg_addr self._rm = ResourceManager.create(name=self._rcfg.resource_manager, cfg=self._cfg, rcfg=self._rcfg, log=self._log, prof=self._prof) diff --git a/src/radical/pilot/agent/bootstrap_0.sh b/src/radical/pilot/agent/bootstrap_0.sh index 91538269a5..12bcbc9a1f 100755 --- a/src/radical/pilot/agent/bootstrap_0.sh +++ b/src/radical/pilot/agent/bootstrap_0.sh @@ -1650,10 +1650,10 @@ get_tunnel(){ if test -z "$BIND_ADDRESS" then - BIND_ADDRESS=$(ip addr - | grep 'state UP' -A2 - | grep 'inet' - | awk '{print $2}' + BIND_ADDRESS=$(ip addr \ + | grep 'state UP' -A2 \ + | grep 'inet' \ + | awk '{print $2}' \ | cut -f1 -d'/') # BIND_ADDRESS="127.0.0.1" fi From b6896f4ee453edf2b2814a2d50619e150e16bded Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 30 May 2023 17:55:55 +0200 Subject: [PATCH 056/171] wait for all tasks, not only last chunk! --- examples/00_getting_started.py | 2 +- src/radical/pilot/task_manager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 2cad3b9fa3..bcb05f24c8 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -67,7 +67,7 @@ pilot = pmgr.submit_pilots(pdesc) - n = 16 # number of tasks to run + n = 1024 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index 8d96f8791b..61a782bda6 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -854,7 +854,7 @@ def submit_tasks(self, descriptions): # keep tasks around with self._tasks_lock: - for task in tasks: + for task in ret: self._tasks[task.uid] = task self._rep.progress_done() From 60218abcbed4a3851b3bb9f6324d5872cf0c84a1 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 31 May 2023 22:24:02 +0200 Subject: [PATCH 057/171] snapshot --- examples/00_getting_started.py | 4 +- src/radical/pilot/agent/agent_0.py | 4 +- src/radical/pilot/pmgr/launching/base.py | 2 - src/radical/pilot/session.py | 8 +- .../pilot/tmgr/staging_input/default.py | 1 - src/radical/pilot/utils/__init__.py | 17 +- src/radical/pilot/utils/component.py | 321 +++--------------- 7 files changed, 75 insertions(+), 282 deletions(-) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index bcb05f24c8..278afdcd1f 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -60,14 +60,14 @@ 'project' : config.get('project'), 'queue' : config.get('queue'), 'access_schema' : config.get('schema'), - 'cores' : config.get('cores', 1), + 'cores' : 1024, 'gpus' : config.get('gpus', 0) } pdesc = rp.PilotDescription(pd_init) pilot = pmgr.submit_pilots(pdesc) - n = 1024 # number of tasks to run + n = 16 * 1024 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 6c2d1c7ec4..72fd4e84f5 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -211,7 +211,7 @@ def _proxy_input_cb(self, msg): def _proxy_output_cb(self, msg): # we just forward the tasks to the task proxy queue - self._log.debug('=== proxy output cb: %s', len(msg)) + self._log.debug('proxy output cb: %s', len(msg)) self.advance(msg, publish=False, push=True, qname=self._sid) @@ -649,7 +649,7 @@ def _proxy_control_cb(self, topic, msg): env_spec = arg for env_id in env_spec: - # ensure we have a hb period + # ensure we have a full hb period for the prep_env call self._hb.beat(uid=self._pmgr) self._prepare_env(env_id, env_spec[env_id]) return True diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index f50946084c..607a4f21d7 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -283,8 +283,6 @@ def _kill_pilots(self, pids): # def work(self, pilots): - self._log.debug('==== %s', pilots) - if not isinstance(pilots, list): pilots = [pilots] diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index a7bba46830..5afb5d9618 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -484,7 +484,7 @@ def _init_cfg_from_registry(self): self._log.info('radical.saga version: %s', rs.version_detail) self._log.info('radical.utils version: %s', ru.version_detail) - self._log.debug('=== Session(%s, %s)', self._uid, self._role) + self._log.debug('Session(%s, %s)', self._uid, self._role) self._prof.prof('session_start', uid=self._uid) @@ -587,6 +587,8 @@ def _init_components(self): assert self._role in [self._PRIMARY, self._AGENT_0, self._AGENT_N] + self._reg.dump('init') + # primary sessions and agents have a component manager which also # manages heartbeat. 'self._cmgr.close()` should be called during # termination @@ -608,7 +610,7 @@ def _init_components(self): # forward any control messages to the proxy def fwd_control(topic, msg): - self._log.debug('=== fwd control %s: %s', topic, msg) + self._log.debug('fwd control %s: %s', topic, msg) self._proxy_ctrl_pub.put(rpc.PROXY_CONTROL_PUBSUB, msg) ru.write_json(fname='foo.json', data=self._reg['bridges']) @@ -628,7 +630,7 @@ def fwd_control(topic, msg): # collect any state updates from the proxy def fwd_state(topic, msg): - self._log.debug('=== fwd state %s: %s', topic, msg) + self._log.debug('fwd state %s: %s', topic, msg) self._state_pub.put(topic, msg) self._state_pub = ru.zmq.Publisher( diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index ab33f8a99d..9d03fd6bed 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -131,7 +131,6 @@ def _advance_tasks(self, tasks, pid=None, state=None, push=True): # perform and publish state update # push to the proxy queue - self._log.debug('=== send to pq: %d', len(tasks)) self.advance(tasks, state, publish=True, push=push, qname=pid) diff --git a/src/radical/pilot/utils/__init__.py b/src/radical/pilot/utils/__init__.py index 4f6f2ab917..120969e3a1 100644 --- a/src/radical/pilot/utils/__init__.py +++ b/src/radical/pilot/utils/__init__.py @@ -24,19 +24,22 @@ import resource _limits = list(resource.getrlimit(resource.RLIMIT_NOFILE)) _limits[0] = 512 - resource.setrlimit(resource.RLIMIT_NOFILE, _limits) + resource.setrlimit(resource.RLIMIT_NOFILE, tuple(_limits)) + except: pass # ------------------------------------------------------------------------------ # -from .db_utils import * -from .prof_utils import * -from .misc import * -from .session import * -from .component import * -from .serializer import * +from .db_utils import * +from .prof_utils import * +from .misc import * +from .session import * +from .component import * +from .component_manager import * +from .serializer import * + # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index a649b8ea66..6e947cc673 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -34,250 +34,6 @@ def _atfork_child(): ru.atfork(ru.noop, ru.noop, _atfork_child) -# ------------------------------------------------------------------------------ -# -class ComponentManager(object): - ''' - RP spans a hierarchy of component instances: the application has a pmgr and - tmgr, and the tmgr has a staging component and a scheduling component, and - the pmgr has a launching component, and components also can have bridges, - etc. This ComponentManager centralises the code needed to spawn, manage and - terminate such components. Any code which needs to create component should - create a ComponentManager instance and pass the required component and - bridge layout and configuration. Callng `stop()` on the cmgr will terminate - the components and brisged. - ''' - - # -------------------------------------------------------------------------- - # - def __init__(self, sid, reg_addr, owner): - - # register for at-fork hooks - _components.append(self) - - # create a registry client to obtain the session config and to store - # component and bridge configs - - self._sid = sid - self._reg_addr = reg_addr - self._owner = owner - - self._reg = ru.zmq.RegistryClient(url=self._reg_addr) - self._cfg = ru.Config(from_dict=self._reg['cfg']) - - self._uid = ru.generate_id('cmgr.%(item_counter)04d', - ru.ID_CUSTOM, ns=self._sid) - self._uids = [self._uid] # uids to track hartbeats for (incl. own) - - self._prof = ru.Profiler(self._uid, ns='radical.pilot', - path=self._cfg.path) - self._log = ru.Logger(self._uid, ns='radical.pilot', - path=self._cfg.path) - - self._prof.prof('init2', uid=self._uid, msg=self._cfg.path) - - self._log.debug('=== cmgr %s (%s)', self._uid, self._owner) - - # Every ComponentManager runs a HB pubsub bridge in a separate thread. - # That HB channel should be used by all components and bridges created - # under this CMGR. - bcfg = ru.Config(cfg={'channel' : 'heartbeat', - 'type' : 'pubsub', - 'uid' : self._uid + '.hb', - 'stall_hwm' : 1, - 'bulk_size' : 0, - 'reg_addr' : self._cfg.reg_addr, - 'path' : self._cfg.path}) - self._hb_bridge = ru.zmq.PubSub(channel='heartbeat', cfg=bcfg) - self._hb_bridge.start() - - hb_cfg = ru.TypedDict(self._reg['cfg.heartbeat']) - hb_cfg.addr_pub = str(self._hb_bridge.addr_pub) - hb_cfg.addr_sub = str(self._hb_bridge.addr_sub) - - # publish heartbeat information in registry - self._cfg.heartbeat = hb_cfg - self._reg['cfg.heartbeat'] = hb_cfg - - # runs a HB monitor on that channel - self._hb = ru.Heartbeat(uid=self.uid, - timeout=hb_cfg['timeout'], - interval=hb_cfg['interval'], - beat_cb=self._hb_beat_cb, # on every heartbeat - term_cb=self._hb_term_cb, # on termination - log=self._log) - - self._hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg['addr_pub'], - log=self._log, prof=self._prof) - self._hb_sub = ru.zmq.Subscriber('heartbeat', hb_cfg['addr_sub'], - topic='heartbeat', cb=self._hb_sub_cb, - log=self._log, prof=self._prof) - - - # confirm the bridge being usable by listening to our own heartbeat - self._hb.start() - self._hb.wait_startup(self._uid, hb_cfg['timeout']) - self._log.info('heartbeat system up') - - - - # -------------------------------------------------------------------------- - # - def _hb_sub_cb(self, topic, msg): - ''' - keep track of heartbeats for all bridges/components we know - ''' - - # self._log.debug('hb_sub %s: get %s check', self.uid, msg['uid']) - if msg['uid'] in self._uids: - # self._log.debug('hb_sub %s: get %s used', self.uid, msg['uid']) - self._hb.beat(uid=msg['uid']) - - - # -------------------------------------------------------------------------- - # - def _hb_beat_cb(self): - ''' - publish own heartbeat on the hb channel - ''' - - self._hb_pub.put('heartbeat', msg={'uid' : self.uid}) - # self._log.debug('hb_cb %s: put %s', self.uid, self.uid) - - - # -------------------------------------------------------------------------- - # - def _hb_term_cb(self, uid=None): - - self._log.debug('hb_term %s: %s died', self.uid, uid) - self._prof.prof('term', uid=self._uid) - - # FIXME: restart goes here - - # NOTE: returning `False` indicates failure to recover. The HB will - # terminate and suicidally kill the very process it is living in. - # Make sure all required cleanup is done at this point! - - return False - - - # -------------------------------------------------------------------------- - # - @property - def uid(self): - return self._uid - - - # -------------------------------------------------------------------------- - # - def start_bridges(self, bridges): - - self._prof.prof('start_bridges_start', uid=self._uid) - - timeout = self._cfg.heartbeat.timeout - - for bname, bcfg in bridges.items(): - - bcfg.uid = bname - bcfg.channel = bname - bcfg.cmgr = self.uid - bcfg.owner = self._owner - bcfg.sid = self._cfg.sid - bcfg.path = self._cfg.path - bcfg.reg_addr = self._cfg.reg_addr - bcfg.heartbeat = self._cfg.heartbeat - - self._reg['bridges.%s.cfg' % bname] = bcfg - - # self._reg.put('bridge.%s' % bname, bcfg) - - self._log.info('create bridge %s [%s]', bname, bcfg.uid) - - cmd = 'radical-pilot-bridge %s %s %s' \ - % (self._sid, self._reg.url, bname) - out, err, ret = ru.sh_callout(cmd, cwd=self._cfg.path) - - self._log.debug('bridge startup out: %s', out) - self._log.debug('bridge startup err: %s', err) - - if ret: - raise RuntimeError('bridge startup failed') - - self._uids.append(bname) - self._log.info('created bridge %s [%s]', bname, bname) - - # all bridges are started, wait for their heartbeats - # self._log.debug('wait for %s', self._uids) - failed = self._hb.wait_startup(self._uids, timeout=timeout) - # self._log.debug('waited for %s: %s', self._uids, failed) - if failed: - raise RuntimeError('could not start all bridges %s' % failed) - - self._prof.prof('start_bridges_stop', uid=self._uid) - - - # -------------------------------------------------------------------------- - # - def start_components(self, components, cfg = None): - - self._prof.prof('start_components_start: %s', uid=self._uid) - - timeout = self._cfg.heartbeat.timeout - - for cname, ccfg in components.items(): - - for _ in range(ccfg.get('count', 1)): - - uid = ru.generate_id(cname + '.%(item_counter)04d', - ru.ID_CUSTOM, ns=self._sid) - ccfg.uid = uid - ccfg.kind = cname - ccfg.owner = self._owner - ccfg.sid = self._cfg.sid - ccfg.cmgr = self._cfg.uid - ccfg.base = self._cfg.base - ccfg.path = self._cfg.path - ccfg.reg_addr = self._cfg.reg_addr - ccfg.proxy_url = self._cfg.proxy_url - ccfg.heartbeat = self._cfg.heartbeat - - if cfg: - ru.dict_merge(ccfg, cfg, ru.OVERWRITE) - - self._reg['components.%s.cfg' % uid] = ccfg - - self._log.info('create component %s [%s]', cname, uid) - - cmd = 'radical-pilot-component %s %s %s' \ - % (self._sid, self._reg.url, uid) - out, err, ret = ru.sh_callout(cmd, cwd=self._cfg.path) - - self._log.debug('component startup out: %s' , out) - self._log.debug('component startup err: %s' , err) - - if ret: - raise RuntimeError('component startup failed') - - self._uids.append(uid) - self._log.info('created component %s [%s]', cname, uid) - - # all components should start now, for their heartbeats - # to appear. - failed = self._hb.wait_startup(self._uids, timeout=timeout * 10) - if failed: - raise RuntimeError('could not start all components %s' % failed) - - self._prof.prof('start_components_stop', uid=self._uid) - - - # -------------------------------------------------------------------------- - # - def close(self): - - self._prof.prof('close', uid=self._uid) - - self._hb_bridge.stop() - self._hb.stop() # ------------------------------------------------------------------------------ @@ -343,18 +99,11 @@ def __init__(self, cfg, session): to a file name to be opened as `ru.Config`, or as a pre-populated `ru.Config` instance). That config MUST contain a session ID (`sid`) for the session under which to run this component, and a uid for the component - itself which MUST be unique within the scope of the given session. It MUST - further contain information about the session's heartbeat ZMQ pubsub channel - (`hb_pub`, `hb_sub`) on which heartbeats are sent and received for lifetime - management. All components and the session will continuously sent heartbeat - messages on that channel - missing heartbeats will by default lead to - session termination. - - The config MAY contain `bridges` and `component` sections. If those exist, - the component will start the communication bridges and the components - specified therein, and is then considered an owner of those components and - bridges. As such, it much watch the HB channel for heartbeats from those - components, and must terminate itself if those go AWOL. + itself which MUST be unique within the scope of the given session. + + All components and the component managers will continuously sent heartbeat + messages on the control pubsub - missing heartbeats will by default lead to + component termination. Further, the class must implement the registered work methods, with a signature of:: @@ -407,6 +156,9 @@ def __init__(self, cfg, session): constructor. ''' + # register for at-fork hooks + _components.append(self) + # NOTE: a fork will not duplicate any threads of the parent process -- # but it will duplicate any locks which are shared between the # parent process and its threads -- and those locks might be in @@ -462,8 +214,21 @@ def __init__(self, cfg, session): # def start(self): + # start heartbeat monitor + self._hb = ru.Heartbeat(uid=self.uid, + timeout=self._reg['cfg.heartbeat.timeout'], + interval=self._reg['cfg.heartbeat.interval'], + beat_cb=self._hb_beat_cb, # on every heartbeat + term_cb=self._hb_term_cb, # on termination + log=self._log) + + # heartbeat watches our own cmgr + self._log.debug('=== hb watch %s', self._owner) + self._hb.beat(self._uid) + + # start worker thread sync = mt.Event() - self._thread = mt.Thread(target=self._worker_thread, args=[sync]) + self._thread = mt.Thread(target=self._work_loop, args=[sync]) self._thread.daemon = True self._thread.start() @@ -479,7 +244,33 @@ def start(self): # -------------------------------------------------------------------------- # - def _worker_thread(self, sync): + def _hb_beat_cb(self): + ''' + publish own heartbeat in the registry and check owner's heartbeat + ''' + + tstamp = time.time() + self._reg['heartbeats.timestamps.%s' % self._uid] = tstamp + self._log.debug('=== hb_beat %s: put %.1f', self.uid, tstamp) + + tstamp = self._reg['heartbeats.timestamps.%s' % self._owner] + self._hb.beat(self._owner, timestamp=tstamp) + + + # -------------------------------------------------------------------------- + # + def _hb_term_cb(self, uid=None): + + self._log.debug('=== hb_term %s: %s died', self.uid, uid) + self._prof.prof('term', uid=self._uid) + + # cmgr is gone, no restart possible - terminate + return False + + + # -------------------------------------------------------------------------- + # + def _work_loop(self, sync): try: self._initialize() @@ -817,7 +608,7 @@ def get_input_ep(self, qname): cfg = self._reg['bridges'][qname] - self._log.debug('====== get input ep: %s', qname) + self._log.debug('get input ep: %s', qname) return ru.zmq.Getter(qname, url=cfg['get']) @@ -1074,7 +865,7 @@ def work_cb(self): # next input continue - # self._log.debug('work_cb ===== : %d', len(things)) + # self._log.debug('work_cb: %d', len(things)) # the worker target depends on the state of things, so we # need to sort the things into buckets by state before @@ -1253,7 +1044,7 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, if _state in rps.FINAL: # things in final state are dropped for thing in _things: - self._log.debug('=== final %s [%s]', thing['uid'], _state) + # self._log.debug('final %s [%s]', thing['uid'], _state) self._prof.prof('drop', uid=thing['uid'], state=_state, ts=ts) continue @@ -1261,8 +1052,8 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, if _state not in self._outputs: # unknown target state -- error for thing in _things: - self._log.debug("lost %s [%s] : %s", thing['uid'], - _state, self._outputs) + # self._log.debug("lost %s [%s] : %s", thing['uid'], + # _state, self._outputs) self._prof.prof('lost', uid=thing['uid'], state=_state, ts=ts) continue @@ -1270,7 +1061,7 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, if not self._outputs[_state]: # empty output -- drop thing for thing in _things: - self._log.debug('=== drop %s [%s]', thing['uid'], _state) + # self._log.debug('drop %s [%s]', thing['uid'], _state) self._prof.prof('drop', uid=thing['uid'], state=_state, ts=ts) continue @@ -1278,7 +1069,7 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, output = self._outputs[_state] # push the thing down the drain - self._log.debug('=== put bulk %s: %s: %s', _state, len(_things), + self._log.debug('put bulk %s: %s: %s', _state, len(_things), output.channel) output.put(_things, qname=qname) From 99f82dd22c274fb2c9007fad0f15e4410892f3cb Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 5 Jun 2023 18:39:22 +0200 Subject: [PATCH 058/171] fix heartbeat management --- bin/radical-pilot-bridge | 39 ++++---- bin/radical-pilot-component | 29 +++--- bin/radical-pilot-worker | 4 +- examples/00_getting_started.py | 1 - src/radical/pilot/agent/agent_0.py | 2 +- src/radical/pilot/proxy.py | 12 +-- src/radical/pilot/session.py | 143 +++++++++++++++------------ src/radical/pilot/utils/component.py | 12 ++- 8 files changed, 133 insertions(+), 109 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index f32eb2eab1..0417055462 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -30,8 +30,8 @@ def main(sid, reg_addr, uid): { 'from' : 'uid', - 'pub' : 'addr_pub', - 'sub' : 'addr_sub', + 'addr_pub': 'addr_pub', + 'addr_sub': 'addr_sub', 'interval': , 'timeout' : } @@ -55,13 +55,13 @@ def main(sid, reg_addr, uid): pubsub and queue type bridges): { - 'pub': '$addr_pub', - 'sub': '$addr_sub' + 'addr_pub': '$addr_pub', + 'addr_sub': '$addr_sub' } { - 'put': '$addr_put', - 'get': '$addr_get' + 'addr_put': '$addr_put', + 'addr_get': '$addr_get' } That config is formed so that any publishers, subscribers, putters or getters @@ -97,21 +97,22 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term = mt.Event() reg = ru.zmq.RegistryClient(url=reg_addr) - scfg = ru.TypedDict(reg['cfg']) - bcfg = ru.TypedDict(reg['bridges.%s.cfg' % uid]) + + hb_cfg = ru.TypedDict(reg['heartbeat']) + b_cfg = ru.TypedDict(reg['bridges.%s.cfg' % uid]) spt.setproctitle('rp.%s' % uid) # create the bridge, store connection addresses in FS, and begin to work - bridge = ru.zmq.Bridge.create(uid, cfg=bcfg) + bridge = ru.zmq.Bridge.create(uid, cfg=b_cfg) - reg['bridges.%s.%s' % (uid, bridge.type_in )] = str(bridge.addr_in) - reg['bridges.%s.%s' % (uid, bridge.type_out)] = str(bridge.addr_out) + reg['bridges.%s.addr_%s' % (uid, bridge.type_in )] = str(bridge.addr_in) + reg['bridges.%s.addr_%s' % (uid, bridge.type_out)] = str(bridge.addr_out) bridge.start() # bridge runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', scfg.heartbeat.addr_pub, + hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): @@ -122,23 +123,23 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term.set() return None - hb = ru.Heartbeat(uid=bcfg.uid, - timeout=scfg.heartbeat.timeout, - interval=scfg.heartbeat.interval, + hb = ru.Heartbeat(uid=b_cfg.uid, + timeout=hb_cfg.timeout, + interval=hb_cfg.interval, beat_cb=hb_beat_cb, term_cb=hb_term_cb, log=log) hb.start() # register cmgr heartbeat by beating once - hb.beat(uid=bcfg.cmgr) + hb.beat(uid=b_cfg.cmgr) # react on cmgr heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == bcfg.cmgr: - hb.beat(uid=bcfg.cmgr) + if msg['uid'] == b_cfg.cmgr: + hb.beat(uid=b_cfg.cmgr) - ru.zmq.Subscriber('heartbeat', scfg.heartbeat.addr_sub, + ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, topic='heartbeat', cb=hb_sub_cb, log=log, prof=prof) diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 2e91dc762e..77d324b586 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -33,8 +33,8 @@ def main(sid, reg_addr, uid): { 'from' : 'uid', - 'pub' : 'addr_pub', - 'sub' : 'addr_sub', + 'addr_pub': 'addr_pub', + 'addr_sub': 'addr_sub', 'interval': , 'timeout' : } @@ -74,21 +74,22 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term = mt.Event() reg = ru.zmq.RegistryClient(url=reg_addr) - scfg = ru.TypedDict(reg['cfg']) - ccfg = ru.TypedDict(reg['components.%s.cfg' % uid]) + + hb_cfg = ru.TypedDict(reg['heartbeat']) + c_cfg = ru.TypedDict(reg['components.%s.cfg' % uid]) spt.setproctitle('rp.%s' % uid) # start a non-primary session - session = rp.Session(uid=sid, cfg=ccfg, + session = rp.Session(uid=sid, cfg=c_cfg, _role=rp.Session._DEFAULT, _reg_addr=reg_addr) # create the component and begin to work - comp = rp.utils.Component.create(ccfg, session) + comp = rp.utils.Component.create(c_cfg, session) comp.start() # component runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', scfg.heartbeat.addr_pub) + hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub) # log=session._log, prof=session._prof) def hb_beat_cb(): @@ -99,23 +100,23 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term.set() return None - hb = ru.Heartbeat(uid=ccfg.uid, - timeout=scfg.heartbeat.timeout, - interval=scfg.heartbeat.interval, + hb = ru.Heartbeat(uid=c_cfg.uid, + timeout=hb_cfg.timeout, + interval=hb_cfg.interval, beat_cb=hb_beat_cb, term_cb=hb_term_cb, log=log) hb.start() # register cmgr heartbeat by beating once - hb.beat(uid=ccfg.cmgr) + hb.beat(uid=c_cfg.cmgr) # record cmgr heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == ccfg.cmgr: - hb.beat(uid=ccfg.cmgr) + if msg['uid'] == c_cfg.cmgr: + hb.beat(uid=c_cfg.cmgr) - ru.zmq.Subscriber('heartbeat', scfg.heartbeat.addr_sub, + ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, topic='heartbeat', cb=hb_sub_cb, log=log, prof=prof) diff --git a/bin/radical-pilot-worker b/bin/radical-pilot-worker index b16c2053e1..e3d1ea0289 100755 --- a/bin/radical-pilot-worker +++ b/bin/radical-pilot-worker @@ -29,8 +29,8 @@ def main(cfg): { 'from' : 'uid', - 'pub' : 'addr_pub', - 'sub' : 'addr_sub', + 'addr_pub': 'addr_pub', + 'addr_sub': 'addr_sub', 'interval': , 'timeout' : } diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 278afdcd1f..d28281590d 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -4,7 +4,6 @@ __license__ = 'MIT' import os -from re import A import sys import random diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 72fd4e84f5..4a8c877584 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -117,7 +117,7 @@ def __init__(self, cfg: ru.Config): self.register_timed_cb(self._check_lifetime, timer=10) # as long as we are alive, we also want to keep the proxy alive - self._session._run_proxy_hb() + # self._session._run_proxy_hb() # all set up - connect to proxy to fetch / push tasks self._connect_proxy() diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py index 97630db8a0..b3784f7432 100644 --- a/src/radical/pilot/proxy.py +++ b/src/radical/pilot/proxy.py @@ -260,12 +260,12 @@ def _worker(self, sid, q, term): proxy_sp.start() proxy_aq.start() - cfg = {'proxy_control_pubsub': {'pub': str(proxy_cp.addr_pub), - 'sub': str(proxy_cp.addr_sub)}, - 'proxy_state_pubsub' : {'pub': str(proxy_sp.addr_pub), - 'sub': str(proxy_sp.addr_sub)}, - 'proxy_task_queue' : {'put': str(proxy_aq.addr_put), - 'get': str(proxy_aq.addr_get)}} + cfg = {'proxy_control_pubsub': {'addr_pub': str(proxy_cp.addr_pub), + 'addr_sub': str(proxy_cp.addr_sub)}, + 'proxy_state_pubsub' : {'addr_pub': str(proxy_sp.addr_pub), + 'addr_sub': str(proxy_sp.addr_sub)}, + 'proxy_task_queue' : {'addr_put': str(proxy_aq.addr_put), + 'addr_get': str(proxy_aq.addr_get)}} # inform service about endpoint details q.put(cfg) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 5afb5d9618..ab6bfe0eeb 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -255,11 +255,14 @@ def _init_primary(self): # only primary sessions start and initialize the proxy service self._start_proxy() + # start heartbeat channel + self._start_heartbeat() + # push the session config into the registry self._publish_cfg() # start bridges and components - self._init_components() + self._start_components() # -------------------------------------------------------------------------- @@ -282,9 +285,10 @@ def _init_agent_0(self): self._init_cfg_from_dict() self._start_registry() self._connect_registry() + self._start_heartbeat() self._publish_cfg() self._connect_proxy() - self._init_components() + self._start_components() # -------------------------------------------------------------------------- @@ -299,7 +303,7 @@ def _init_agent_n(self): self._connect_registry() self._init_cfg_from_registry() - self._init_components() + self._start_components() # -------------------------------------------------------------------------- @@ -488,6 +492,70 @@ def _init_cfg_from_registry(self): self._prof.prof('session_start', uid=self._uid) + # -------------------------------------------------------------------------- + # + def _start_heartbeat(self): + + # only primary and agent_0 sessions manage heartbeats + assert self._role in [self._PRIMARY, self._AGENT_0] + + # start the embedded heartbeat pubsub bridge + self._hb_pubsub = ru.zmq.PubSub('heartbeat_pubsub', + cfg={'uid' : 'heartbeat_pubsub', + 'type' : 'pubsub', + 'log_lvl': 'debug', + 'path' : self._cfg.path}) + self._hb_pubsub.start() + time.sleep(1) + + # fill 'cfg.heartbeat' section + self._cfg.heartbeat.addr_pub = str(self._hb_pubsub.addr_pub) + self._cfg.heartbeat.addr_sub = str(self._hb_pubsub.addr_sub) + + # create a publisher for that channel to publish own heartbeat + self._hb_pub = ru.zmq.Publisher(channel='heartbeat_pubsub', + url=self._cfg.heartbeat.addr_pub, + log=self._log, + prof=self._prof) + + + # start the heartbeat monitor, but first define its callbacks + def _hb_beat_cb(): + # called on every heartbeat: cfg.heartbeat.interval` + # publish own heartbeat + self._hb_pub.put('heartbeat', + {'cmd' : 'heartbeat', + 'args': {'uid': self._uid}}) + # also update proxy heartbeat + self._proxy.request('heartbeat', {'sid': self._uid}) + + def _hb_term_cb(): + # called when some entity misses heartbeats: `cfg.heartbeat.timeout` + if self._cmgr: + self._cmgr.close() + return False + + # create heartbeat manager which monitors all components in this session + self._hb = ru.Heartbeat(uid=self._uid, + timeout=self._cfg.heartbeat.timeout, + interval=self._cfg.heartbeat.interval, + beat_cb=_hb_beat_cb, + term_cb=_hb_term_cb, + log=self._log) + + # subscribe to heartbeat messages on the pubsub + def _hb_msg_cb(topic, msg): + # inform the heartbeat manager about every received heartbeat + self._hb.beat(msg['uid']) + + ru.zmq.Subscriber(channel='heartbeat_pubsub', + topic='heartbeat', + url=self._cfg.heartbeat.addr_sub, + cb=_hb_msg_cb, + log=self._log, + prof=self._prof) + + # -------------------------------------------------------------------------- # def _publish_cfg(self): @@ -500,10 +568,12 @@ def _publish_cfg(self): # push proxy, bridges, components and heartbeat subsections separately flat_cfg = copy.deepcopy(self._cfg) + del flat_cfg['heartbeat'] del flat_cfg['bridges'] del flat_cfg['components'] self._reg['cfg'] = flat_cfg + self._reg['heartbeat'] = self._cfg.heartbeat self._reg['bridges'] = self._cfg.bridges # proxy bridges self._reg['components'] = {} @@ -583,7 +653,7 @@ def _connect_proxy(self): # -------------------------------------------------------------------------- # - def _init_components(self): + def _start_components(self): assert self._role in [self._PRIMARY, self._AGENT_0, self._AGENT_N] @@ -605,9 +675,6 @@ def _init_components(self): self._reg.dump('%s.bridges' % self._role) - # make sure we send heartbeats to the proxy - self._run_proxy_hb() - # forward any control messages to the proxy def fwd_control(topic, msg): self._log.debug('fwd control %s: %s', topic, msg) @@ -618,12 +685,12 @@ def fwd_control(topic, msg): self._proxy_ctrl_pub = ru.zmq.Publisher( channel=rpc.PROXY_CONTROL_PUBSUB, - url=self._reg['bridges.proxy_control_pubsub.pub'], + url=self._reg['bridges.proxy_control_pubsub.addr_pub'], path=self._cfg.path) self._ctrl_sub = ru.zmq.Subscriber( channel=rpc.CONTROL_PUBSUB, - url=self._reg['bridges.control_pubsub.sub'], + url=self._reg['bridges.control_pubsub.addr_sub'], path=self._cfg.path) self._ctrl_sub.subscribe(rpc.CONTROL_PUBSUB, fwd_control) @@ -635,12 +702,12 @@ def fwd_state(topic, msg): self._state_pub = ru.zmq.Publisher( channel=rpc.STATE_PUBSUB, - url=self._reg['bridges.state_pubsub.pub'], + url=self._reg['bridges.state_pubsub.addr_pub'], path=self._cfg.path) self._proxy_state_sub = ru.zmq.Subscriber( channel=rpc.PROXY_STATE_PUBSUB, - url=self._reg['bridges.proxy_state_pubsub.sub'], + url=self._reg['bridges.proxy_state_pubsub.addr_sub'], path=self._cfg.path) self._proxy_state_sub.subscribe(rpc.PROXY_STATE_PUBSUB, fwd_state) @@ -733,42 +800,15 @@ def close(self, **kwargs): self._prof.prof("session_fetch_stop", uid=self._uid) if self._role == self._PRIMARY: + + # dump json + self._reg.dump('final') + self._t_stop = time.time() self._rep.info('<>ok\n') - # dump json - json = {'session' : self.as_dict(), - 'pmgr' : list(), - 'pilot' : list(), - 'tmgr' : list(), - 'task' : list()} - - # json['session']['_id'] = self.uid - json['session']['type'] = 'session' - json['session']['uid'] = self.uid - - for fname in glob.glob('%s/pmgr.*.json' % self.path): - json['pmgr'].append(ru.read_json(fname)) - - for fname in glob.glob('%s/pilot.*.json' % self.path): - json['pilot'].append(ru.read_json(fname)) - - for fname in glob.glob('%s/tmgr.*.json' % self.path): - json['tmgr'].append(ru.read_json(fname)) - - for fname in glob.glob('%s/tasks.*.json' % self.path): - json['task'] += ru.read_json(fname) - - tgt = '%s/%s.json' % (self.path, self.uid) - ru.write_json(json, tgt) - - if self._closed and self._created: - self._rep.info('<>ok\n') - # -------------------------------------------------------------------------- # @@ -790,25 +830,6 @@ def _run_proxy(self): proxy.wait() - # -------------------------------------------------------------------------- - # - def _run_proxy_hb(self): - - self._proxy_heartbeat_thread = mt.Thread(target=self._proxy_hb) - self._proxy_heartbeat_thread.daemon = True - self._proxy_heartbeat_thread.start() - - - # -------------------------------------------------------------------------- - # - def _proxy_hb(self): - - while True: - - self._proxy.request('heartbeat', {'sid': self._uid}) - time.sleep(20) - - # -------------------------------------------------------------------------- # def as_dict(self): diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 6e947cc673..a8540a7311 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -609,7 +609,7 @@ def get_input_ep(self, qname): cfg = self._reg['bridges'][qname] self._log.debug('get input ep: %s', qname) - return ru.zmq.Getter(qname, url=cfg['get']) + return ru.zmq.Getter(qname, url=cfg['addr_get']) # -------------------------------------------------------------------------- @@ -621,7 +621,7 @@ def get_output_ep(self, qname): cfg = self._reg['bridges'][qname] - return ru.zmq.Putter(qname, url=cfg['put']) + return ru.zmq.Putter(qname, url=cfg['addr_put']) # -------------------------------------------------------------------------- @@ -783,10 +783,12 @@ def register_publisher(self, pubsub): ''' assert pubsub not in self._publishers + cfg = self._reg['bridges.%s' % pubsub] - cfg = self._reg['bridges'][pubsub] + import pprint + self._log.debug('===>> %s', pprint.pformat(cfg)) self._publishers[pubsub] = ru.zmq.Publisher(channel=pubsub, - url=cfg['pub'], + url=cfg['addr_pub'], log=self._log, prof=self._prof) @@ -816,7 +818,7 @@ def register_subscriber(self, pubsub, cb): if pubsub not in self._subscribers: self._subscribers[pubsub] = ru.zmq.Subscriber(channel=pubsub, - url=cfg['sub'], + url=cfg['addr_sub'], log=self._log, prof=self._prof) From 8ea4e203543bf7bb6cf00fe7bd282beb524e51eb Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 6 Jun 2023 22:38:48 +0200 Subject: [PATCH 059/171] cleanup --- examples/00_getting_started.py | 2 +- src/radical/pilot/agent/agent_0.py | 38 ---------------------------- src/radical/pilot/utils/component.py | 38 ---------------------------- 3 files changed, 1 insertion(+), 77 deletions(-) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index d28281590d..86190780d7 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -66,7 +66,7 @@ pilot = pmgr.submit_pilots(pdesc) - n = 16 * 1024 # number of tasks to run + n = 1 * 1024 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 4a8c877584..95cd09e322 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -88,23 +88,6 @@ def __init__(self, cfg: ru.Config): self.register_subscriber(rpc.CONTROL_PUBSUB, self._check_control) self.register_subscriber(rpc.STATE_PUBSUB, self._service_state_cb) - # run our own slow-paced heartbeat monitor to watch pmgr heartbeats - # FIXME: we need to get pmgr freq - freq = 100 - tint = freq / 3 - tout = freq * 10 - self._hb = ru.Heartbeat(uid=self._uid, - timeout=tout, - interval=tint, - beat_cb=self._hb_check, # no own heartbeat(pmgr pulls) - term_cb=self._hb_term_cb, - log=self._log) - self._hb.start() - - # register pmgr heartbeat - self._log.info('hb init for %s', self._pmgr) - self._hb.beat(uid=self._pmgr) - # register the control callback self.register_subscriber(rpc.PROXY_CONTROL_PUBSUB, self._proxy_control_cb) @@ -116,30 +99,10 @@ def __init__(self, cfg: ru.Config): # regularly check for lifetime limit self.register_timed_cb(self._check_lifetime, timer=10) - # as long as we are alive, we also want to keep the proxy alive - # self._session._run_proxy_hb() - # all set up - connect to proxy to fetch / push tasks self._connect_proxy() - # -------------------------------------------------------------------------- - # - def _hb_check(self): - - self._log.debug('hb check') - - - # -------------------------------------------------------------------------- - # - def _hb_term_cb(self, msg=None): - - self._session.close() - self._log.warn('hb termination: %s', msg) - - return None - - # -------------------------------------------------------------------------- # def _connect_proxy(self): @@ -368,7 +331,6 @@ def finalize(self): # tear things down in reverse order self._rm.stop() - self._hb.stop() self._session.close() diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index a8540a7311..fd133724b3 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -214,18 +214,6 @@ def __init__(self, cfg, session): # def start(self): - # start heartbeat monitor - self._hb = ru.Heartbeat(uid=self.uid, - timeout=self._reg['cfg.heartbeat.timeout'], - interval=self._reg['cfg.heartbeat.interval'], - beat_cb=self._hb_beat_cb, # on every heartbeat - term_cb=self._hb_term_cb, # on termination - log=self._log) - - # heartbeat watches our own cmgr - self._log.debug('=== hb watch %s', self._owner) - self._hb.beat(self._uid) - # start worker thread sync = mt.Event() self._thread = mt.Thread(target=self._work_loop, args=[sync]) @@ -242,32 +230,6 @@ def start(self): assert self._thread.is_alive() - # -------------------------------------------------------------------------- - # - def _hb_beat_cb(self): - ''' - publish own heartbeat in the registry and check owner's heartbeat - ''' - - tstamp = time.time() - self._reg['heartbeats.timestamps.%s' % self._uid] = tstamp - self._log.debug('=== hb_beat %s: put %.1f', self.uid, tstamp) - - tstamp = self._reg['heartbeats.timestamps.%s' % self._owner] - self._hb.beat(self._owner, timestamp=tstamp) - - - # -------------------------------------------------------------------------- - # - def _hb_term_cb(self, uid=None): - - self._log.debug('=== hb_term %s: %s died', self.uid, uid) - self._prof.prof('term', uid=self._uid) - - # cmgr is gone, no restart possible - terminate - return False - - # -------------------------------------------------------------------------- # def _work_loop(self, sync): From ec37635ce2157fc750ed35b5c59f10c2488d7b55 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 6 Jun 2023 22:44:29 +0200 Subject: [PATCH 060/171] resolve conflict --- TODO | 4 ---- 1 file changed, 4 deletions(-) diff --git a/TODO b/TODO index d37ba83a07..746d054f3c 100644 --- a/TODO +++ b/TODO @@ -257,15 +257,11 @@ others - `PRTE` switches in scheduler should become `partition` switches, where the partitions are defined by the RM -<<<<<<< HEAD - stager and other RPC like workers should get a proper async RPC channel (req/res). That should be built upon `ru.zmq.Service`. The client side should wrap the request into a proper async Future. -======= - - create_named_env needs to issue a client error on failures ->>>>>>> devel TexaScale-5: From 47c57d8cd7c105b52fb644b3e7ab848c2d0b8d2b Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 7 Jun 2023 18:47:19 +0200 Subject: [PATCH 061/171] clean agent startup --- bin/plot_profile.ipynb | 234 +-- bin/radical-pilot-agent | 89 -- bin/radical-pilot-agent_n | 163 +++ bin/radical-pilot-bridge | 21 +- bin/radical-pilot-component | 23 +- bin/radical-pilot-prte2prof | 2 +- setup.py | 3 +- src/radical/pilot/agent/agent_0.py | 54 +- src/radical/pilot/agent/agent_n.py | 4 +- src/radical/pilot/agent/bootstrap_0.sh | 121 +- .../pilot/agent/launch_method/mpirun.py | 2 +- src/radical/pilot/configs/agent_debug_sa.json | 4 +- src/radical/pilot/configs/agent_default.json | 2 +- .../pilot/configs/agent_default_sa.json | 2 +- src/radical/pilot/configs/agent_rhea.json | 10 +- .../pilot/configs/agent_summit_sa.json | 4 +- src/radical/pilot/pmgr/launching/base.py | 11 +- src/radical/pilot/pmgr/launching/default.py | 1256 ----------------- src/radical/pilot/proxy.py | 12 +- src/radical/pilot/utils/component.py | 10 +- src/radical/pilot/utils/component_manager.py | 220 +++ src/radical/pilot/utils/prof_utils.py | 4 +- 22 files changed, 607 insertions(+), 1644 deletions(-) delete mode 100755 bin/radical-pilot-agent create mode 100755 bin/radical-pilot-agent_n delete mode 100644 src/radical/pilot/pmgr/launching/default.py create mode 100644 src/radical/pilot/utils/component_manager.py diff --git a/bin/plot_profile.ipynb b/bin/plot_profile.ipynb index 1f12684c79..ef04d2e299 100755 --- a/bin/plot_profile.ipynb +++ b/bin/plot_profile.ipynb @@ -89,7 +89,7 @@ "\n", "Number of pilots in session: 1\n", "Processing pilot 'pilot.0000'\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.AgentExecutingComponent_SHELL.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.AgentExecutingComponent_SHELL.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -97,7 +97,7 @@ "stream": "stdout", "text": [ "\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.AgentExecutingWatcher_SHELL.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.AgentExecutingWatcher_SHELL.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -105,7 +105,7 @@ "stream": "stdout", "text": [ "\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.AgentHeartbeatWorker.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.AgentHeartbeatWorker.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -113,7 +113,7 @@ "stream": "stdout", "text": [ "\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.AgentStagingInputComponent.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.AgentStagingInputComponent.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -121,7 +121,7 @@ "stream": "stdout", "text": [ "\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.AgentStagingOutputComponent.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.AgentStagingOutputComponent.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -129,7 +129,7 @@ "stream": "stdout", "text": [ "\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.AgentUpdateWorker.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.AgentUpdateWorker.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -137,7 +137,7 @@ "stream": "stdout", "text": [ "\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.AgentWorker.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.AgentWorker.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -145,7 +145,7 @@ "stream": "stdout", "text": [ "\n", - "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent.0.SchedulerContinuous.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." + "fetching 'file://localhost/home/merzky/radical.pilot.sandbox/rp.session.cameo.merzky.016673.0005-pilot.0000//agent_0.SchedulerContinuous.0.prof' to 'file://localhost/tmp//rp.session.cameo.merzky.016673.0005/'." ] }, { @@ -552,15 +552,15 @@ "168 13.0622 New PendingInputStaging tmgr_adv_u_pend rp.session.cameo.merzky.016673.0005:MainThread\n", "169 13.2733 PendingInputStaging StagingInput usic_get_u rp.session.cameo.merzky.016673.0005:InputFileT...\n", "175 13.6307 StagingInput AgentStagingInputPending usic_adv_u_pend rp.session.cameo.merzky.016673.0005:InputFileT...\n", - "209 16.0061 AgentStagingInputPending AgentStagingInputPending awo_adv_u_pend agent.0.AgentWorker.0:MainThread\n", - "293 16.0258 AgentStagingInputPending AgentStagingInput asic_adv_u agent.0.AgentStagingInputComponent.0:MainThread\n", - "297 16.0261 AgentStagingInput AllocatingPending asic_adv_u_pend agent.0.AgentStagingInputComponent.0:MainThread\n", - "368 16.0342 AllocatingPending Allocating asc_adv_u agent.0.SchedulerContinuous.0:MainThread\n", - "374 16.0347 Allocating ExecutingPending asc_adv_u_pend agent.0.SchedulerContinuous.0:MainThread\n", - "553 16.5038 ExecutingPending Executing aec_adv_u agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", - "693 16.6241 Executing AgentStagingOutputPending aec_adv_u_pend agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", - "706 16.6323 AgentStagingOutputPending AgentStagingOutput asoc_adv_u agent.0.AgentStagingOutputComponent.0:MainThread\n", - "708 16.6326 AgentStagingOutput PendingOutputStaging asoc_adv_u_pend agent.0.AgentStagingOutputComponent.0:MainThread\n", + "209 16.0061 AgentStagingInputPending AgentStagingInputPending awo_adv_u_pend agent_0.AgentWorker.0:MainThread\n", + "293 16.0258 AgentStagingInputPending AgentStagingInput asic_adv_u agent_0.AgentStagingInputComponent.0:MainThread\n", + "297 16.0261 AgentStagingInput AllocatingPending asic_adv_u_pend agent_0.AgentStagingInputComponent.0:MainThread\n", + "368 16.0342 AllocatingPending Allocating asc_adv_u agent_0.SchedulerContinuous.0:MainThread\n", + "374 16.0347 Allocating ExecutingPending asc_adv_u_pend agent_0.SchedulerContinuous.0:MainThread\n", + "553 16.5038 ExecutingPending Executing aec_adv_u agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", + "693 16.6241 Executing AgentStagingOutputPending aec_adv_u_pend agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", + "706 16.6323 AgentStagingOutputPending AgentStagingOutput asoc_adv_u agent_0.AgentStagingOutputComponent.0:MainThread\n", + "708 16.6326 AgentStagingOutput PendingOutputStaging asoc_adv_u_pend agent_0.AgentStagingOutputComponent.0:MainThread\n", "1553 19.0440 PendingOutputStaging StagingOutput usoc_get_u rp.session.cameo.merzky.016673.0005:OutputFile...\n", "1688 19.2452 StagingOutput Done usoc_adv_u rp.session.cameo.merzky.016673.0005:OutputFile...\n" ] @@ -589,7 +589,7 @@ " 2167\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.5973\n", " task.000070\n", @@ -602,7 +602,7 @@ " 2186\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 20.7117\n", " task.000069\n", @@ -615,7 +615,7 @@ " 2193\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.7125\n", " task.000010\n", @@ -628,7 +628,7 @@ " 2215\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 20.8313\n", " task.000009\n", @@ -641,7 +641,7 @@ " 2222\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.8325\n", " task.000069\n", @@ -654,7 +654,7 @@ " 2362\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.9513\n", " task.000009\n", @@ -667,7 +667,7 @@ " 2398\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.0580\n", " task.000071\n", @@ -680,7 +680,7 @@ " 2470\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.1808\n", " task.000071\n", @@ -693,7 +693,7 @@ " 2475\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.1822\n", " task.000011\n", @@ -706,7 +706,7 @@ " 2500\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.2993\n", " task.000011\n", @@ -719,7 +719,7 @@ " 2538\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.7797\n", " task.000088\n", @@ -732,7 +732,7 @@ " 2632\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.8973\n", " task.000072\n", @@ -745,7 +745,7 @@ " 2638\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.8984\n", " task.000088\n", @@ -758,7 +758,7 @@ " 2769\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 22.0198\n", " task.000089\n", @@ -771,7 +771,7 @@ " 2776\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 22.0206\n", " task.000072\n", @@ -784,7 +784,7 @@ " 2802\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 22.1400\n", " task.000012\n", @@ -797,7 +797,7 @@ " 2809\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 22.1415\n", " task.000089\n", @@ -810,7 +810,7 @@ " 2840\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 22.2618\n", " task.000012\n", @@ -823,7 +823,7 @@ " 2871\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 22.8924\n", " task.000020\n", @@ -836,7 +836,7 @@ " 2922\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 23.0136\n", " task.000020\n", @@ -853,26 +853,26 @@ "prompt_number": 45, "text": [ " event msg name state time uid entity cloned state_from info\n", - "2167 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.5973 task.000070 task False ExecutingPending aec_adv_u_pend\n", - "2186 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.7117 task.000069 task False ExecutingPending aec_adv_u\n", - "2193 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.7125 task.000010 task False Executing aec_adv_u_pend\n", - "2215 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.8313 task.000009 task False ExecutingPending aec_adv_u\n", - "2222 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.8325 task.000069 task False Executing aec_adv_u_pend\n", - "2362 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.9513 task.000009 task False Executing aec_adv_u_pend\n", - "2398 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.0580 task.000071 task False AgentStagingInputPending aec_adv_u\n", - "2470 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.1808 task.000071 task False ExecutingPending aec_adv_u_pend\n", - "2475 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.1822 task.000011 task False ExecutingPending aec_adv_u\n", - "2500 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.2993 task.000011 task False Executing aec_adv_u_pend\n", - "2538 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.7797 task.000088 task False AgentStagingInputPending aec_adv_u\n", - "2632 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.8973 task.000072 task False ExecutingPending aec_adv_u\n", - "2638 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.8984 task.000088 task False ExecutingPending aec_adv_u_pend\n", - "2769 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.0198 task.000089 task False ExecutingPending aec_adv_u\n", - "2776 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.0206 task.000072 task False Executing aec_adv_u_pend\n", - "2802 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.1400 task.000012 task False ExecutingPending aec_adv_u\n", - "2809 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.1415 task.000089 task False Executing aec_adv_u_pend\n", - "2840 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.2618 task.000012 task False Executing aec_adv_u_pend\n", - "2871 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.8924 task.000020 task False AgentStagingInputPending aec_adv_u\n", - "2922 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 23.0136 task.000020 task False ExecutingPending aec_adv_u_pend" + "2167 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.5973 task.000070 task False ExecutingPending aec_adv_u_pend\n", + "2186 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.7117 task.000069 task False ExecutingPending aec_adv_u\n", + "2193 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.7125 task.000010 task False Executing aec_adv_u_pend\n", + "2215 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.8313 task.000009 task False ExecutingPending aec_adv_u\n", + "2222 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.8325 task.000069 task False Executing aec_adv_u_pend\n", + "2362 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.9513 task.000009 task False Executing aec_adv_u_pend\n", + "2398 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.0580 task.000071 task False AgentStagingInputPending aec_adv_u\n", + "2470 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.1808 task.000071 task False ExecutingPending aec_adv_u_pend\n", + "2475 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.1822 task.000011 task False ExecutingPending aec_adv_u\n", + "2500 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.2993 task.000011 task False Executing aec_adv_u_pend\n", + "2538 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.7797 task.000088 task False AgentStagingInputPending aec_adv_u\n", + "2632 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.8973 task.000072 task False ExecutingPending aec_adv_u\n", + "2638 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.8984 task.000088 task False ExecutingPending aec_adv_u_pend\n", + "2769 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.0198 task.000089 task False ExecutingPending aec_adv_u\n", + "2776 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.0206 task.000072 task False Executing aec_adv_u_pend\n", + "2802 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.1400 task.000012 task False ExecutingPending aec_adv_u\n", + "2809 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.1415 task.000089 task False Executing aec_adv_u_pend\n", + "2840 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.2618 task.000012 task False Executing aec_adv_u_pend\n", + "2871 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.8924 task.000020 task False AgentStagingInputPending aec_adv_u\n", + "2922 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 23.0136 task.000020 task False ExecutingPending aec_adv_u_pend" ] } ], @@ -913,7 +913,7 @@ " 2167\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.5973\n", " task.000070\n", @@ -926,7 +926,7 @@ " 2186\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 20.7117\n", " task.000069\n", @@ -939,7 +939,7 @@ " 2193\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.7125\n", " task.000010\n", @@ -952,7 +952,7 @@ " 2215\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 20.8313\n", " task.000009\n", @@ -965,7 +965,7 @@ " 2222\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.8325\n", " task.000069\n", @@ -978,7 +978,7 @@ " 2362\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.9513\n", " task.000009\n", @@ -991,7 +991,7 @@ " 2398\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.0580\n", " task.000071\n", @@ -1004,7 +1004,7 @@ " 2470\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.1808\n", " task.000071\n", @@ -1017,7 +1017,7 @@ " 2475\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.1822\n", " task.000011\n", @@ -1030,7 +1030,7 @@ " 2500\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.2993\n", " task.000011\n", @@ -1047,16 +1047,16 @@ "prompt_number": 48, "text": [ " event msg name state time uid entity cloned state_from info\n", - "2167 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.5973 task.000070 task False ExecutingPending aec_adv_u_pend\n", - "2186 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.7117 task.000069 task False ExecutingPending aec_adv_u\n", - "2193 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.7125 task.000010 task False Executing aec_adv_u_pend\n", - "2215 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.8313 task.000009 task False ExecutingPending aec_adv_u\n", - "2222 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.8325 task.000069 task False Executing aec_adv_u_pend\n", - "2362 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.9513 task.000009 task False Executing aec_adv_u_pend\n", - "2398 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.0580 task.000071 task False AgentStagingInputPending aec_adv_u\n", - "2470 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.1808 task.000071 task False ExecutingPending aec_adv_u_pend\n", - "2475 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.1822 task.000011 task False ExecutingPending aec_adv_u\n", - "2500 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.2993 task.000011 task False Executing aec_adv_u_pend" + "2167 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.5973 task.000070 task False ExecutingPending aec_adv_u_pend\n", + "2186 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.7117 task.000069 task False ExecutingPending aec_adv_u\n", + "2193 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.7125 task.000010 task False Executing aec_adv_u_pend\n", + "2215 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.8313 task.000009 task False ExecutingPending aec_adv_u\n", + "2222 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.8325 task.000069 task False Executing aec_adv_u_pend\n", + "2362 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.9513 task.000009 task False Executing aec_adv_u_pend\n", + "2398 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.0580 task.000071 task False AgentStagingInputPending aec_adv_u\n", + "2470 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.1808 task.000071 task False ExecutingPending aec_adv_u_pend\n", + "2475 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.1822 task.000011 task False ExecutingPending aec_adv_u\n", + "2500 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.2993 task.000011 task False Executing aec_adv_u_pend" ] } ], @@ -1113,7 +1113,7 @@ " 2167\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.5973\n", " task.000070\n", @@ -1127,7 +1127,7 @@ " 2186\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 20.7117\n", " task.000069\n", @@ -1141,7 +1141,7 @@ " 2193\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.7125\n", " task.000010\n", @@ -1155,7 +1155,7 @@ " 2215\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 20.8313\n", " task.000009\n", @@ -1169,7 +1169,7 @@ " 2222\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.8325\n", " task.000069\n", @@ -1183,7 +1183,7 @@ " 2362\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 20.9513\n", " task.000009\n", @@ -1197,7 +1197,7 @@ " 2398\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.0580\n", " task.000071\n", @@ -1211,7 +1211,7 @@ " 2470\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.1808\n", " task.000071\n", @@ -1225,7 +1225,7 @@ " 2475\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.1822\n", " task.000011\n", @@ -1239,7 +1239,7 @@ " 2500\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.2993\n", " task.000011\n", @@ -1253,7 +1253,7 @@ " 2538\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.7797\n", " task.000088\n", @@ -1267,7 +1267,7 @@ " 2632\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 21.8973\n", " task.000072\n", @@ -1281,7 +1281,7 @@ " 2638\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 21.8984\n", " task.000088\n", @@ -1295,7 +1295,7 @@ " 2769\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 22.0198\n", " task.000089\n", @@ -1309,7 +1309,7 @@ " 2776\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 22.0206\n", " task.000072\n", @@ -1323,7 +1323,7 @@ " 2802\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 22.1400\n", " task.000012\n", @@ -1337,7 +1337,7 @@ " 2809\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 22.1415\n", " task.000089\n", @@ -1351,7 +1351,7 @@ " 2840\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 22.2618\n", " task.000012\n", @@ -1365,7 +1365,7 @@ " 2871\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:MainThread\n", + " agent_0.AgentExecutingWatcher_SHELL.0:MainThread\n", " Executing\n", " 22.8924\n", " task.000020\n", @@ -1379,7 +1379,7 @@ " 2922\n", " advance\n", " \n", - " agent.0.AgentExecutingWatcher_SHELL.0:Watcher\n", + " agent_0.AgentExecutingWatcher_SHELL.0:Watcher\n", " AgentStagingOutputPending\n", " 23.0136\n", " task.000020\n", @@ -1397,26 +1397,26 @@ "prompt_number": 52, "text": [ " event msg name state time uid entity cloned state_from info cc_exe\n", - "2167 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.5973 task.000070 task False ExecutingPending aec_adv_u_pend 1\n", - "2186 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.7117 task.000069 task False ExecutingPending aec_adv_u 2\n", - "2193 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.7125 task.000010 task False Executing aec_adv_u_pend 1\n", - "2215 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.8313 task.000009 task False ExecutingPending aec_adv_u 2\n", - "2222 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.8325 task.000069 task False Executing aec_adv_u_pend 1\n", - "2362 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.9513 task.000009 task False Executing aec_adv_u_pend 0\n", - "2398 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.0580 task.000071 task False AgentStagingInputPending aec_adv_u 1\n", - "2470 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.1808 task.000071 task False ExecutingPending aec_adv_u_pend 0\n", - "2475 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.1822 task.000011 task False ExecutingPending aec_adv_u 1\n", - "2500 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.2993 task.000011 task False Executing aec_adv_u_pend 0\n", - "2538 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.7797 task.000088 task False AgentStagingInputPending aec_adv_u 1\n", - "2632 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.8973 task.000072 task False ExecutingPending aec_adv_u 2\n", - "2638 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.8984 task.000088 task False ExecutingPending aec_adv_u_pend 1\n", - "2769 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.0198 task.000089 task False ExecutingPending aec_adv_u 2\n", - "2776 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.0206 task.000072 task False Executing aec_adv_u_pend 1\n", - "2802 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.1400 task.000012 task False ExecutingPending aec_adv_u 2\n", - "2809 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.1415 task.000089 task False Executing aec_adv_u_pend 1\n", - "2840 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.2618 task.000012 task False Executing aec_adv_u_pend 0\n", - "2871 advance agent.0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.8924 task.000020 task False AgentStagingInputPending aec_adv_u 1\n", - "2922 advance agent.0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 23.0136 task.000020 task False ExecutingPending aec_adv_u_pend 0" + "2167 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.5973 task.000070 task False ExecutingPending aec_adv_u_pend 1\n", + "2186 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.7117 task.000069 task False ExecutingPending aec_adv_u 2\n", + "2193 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.7125 task.000010 task False Executing aec_adv_u_pend 1\n", + "2215 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 20.8313 task.000009 task False ExecutingPending aec_adv_u 2\n", + "2222 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.8325 task.000069 task False Executing aec_adv_u_pend 1\n", + "2362 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 20.9513 task.000009 task False Executing aec_adv_u_pend 0\n", + "2398 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.0580 task.000071 task False AgentStagingInputPending aec_adv_u 1\n", + "2470 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.1808 task.000071 task False ExecutingPending aec_adv_u_pend 0\n", + "2475 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.1822 task.000011 task False ExecutingPending aec_adv_u 1\n", + "2500 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.2993 task.000011 task False Executing aec_adv_u_pend 0\n", + "2538 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.7797 task.000088 task False AgentStagingInputPending aec_adv_u 1\n", + "2632 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 21.8973 task.000072 task False ExecutingPending aec_adv_u 2\n", + "2638 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 21.8984 task.000088 task False ExecutingPending aec_adv_u_pend 1\n", + "2769 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.0198 task.000089 task False ExecutingPending aec_adv_u 2\n", + "2776 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.0206 task.000072 task False Executing aec_adv_u_pend 1\n", + "2802 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.1400 task.000012 task False ExecutingPending aec_adv_u 2\n", + "2809 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.1415 task.000089 task False Executing aec_adv_u_pend 1\n", + "2840 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 22.2618 task.000012 task False Executing aec_adv_u_pend 0\n", + "2871 advance agent_0.AgentExecutingWatcher_SHELL.0:MainThread Executing 22.8924 task.000020 task False AgentStagingInputPending aec_adv_u 1\n", + "2922 advance agent_0.AgentExecutingWatcher_SHELL.0:Watcher AgentStagingOutputPending 23.0136 task.000020 task False ExecutingPending aec_adv_u_pend 0" ] } ], diff --git a/bin/radical-pilot-agent b/bin/radical-pilot-agent deleted file mode 100755 index 87ebce152b..0000000000 --- a/bin/radical-pilot-agent +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python3 - -__copyright__ = "Copyright 2014-2016, http://radical.rutgers.edu" -__license__ = "MIT" - - -import os -import sys -import time -import setproctitle - -import radical.utils as ru -import radical.pilot as rp - - -# ------------------------------------------------------------------------------ -# -def bootstrap_3(aid): - """ - This method continues where the bootstrap_0/1/2 left off, and will now pass - control to the Agent class which will spawn the functional components. - Before doing so, we will check if we happen to be agent instance zero. If - that is the case, some additional python level bootstrap routines kick in, - to set the stage for component and sub-agent spawning. - - The agent interprets a config file, which will specify in an 'agents' - section: - - what nodes should be used for sub-agent startup - - what bridges should be started - - what are the endpoints for bridges which are not started - - what components should be started - agent.0 will create derived config files for all sub-agents. - """ - - print("bootstrap agent %s" % aid) - - agent = None - reg_service = None - - try: - setproctitle.setproctitle('rp.%s' % aid) - - cfg = ru.Config(path='%s.cfg' % aid) - - cfg.uid = aid - cfg.aid = aid # used by executor - - if aid == 'agent.0': - agent = rp.Agent_0(cfg) - else: - agent = rp.Agent_n(cfg) - - agent.start() - - # wait until the agent finishes or fails. - while True: - time.sleep(0.1) - - except: - print('failed %s' % aid) - ru.print_exception_trace() - - finally: - # in all cases, make sure we perform an orderly shutdown. I hope python - # does not mind doing all those things in a finally clause of - # (essentially) main... - print('finalize %s' % aid) - - if reg_service: - reg_service.stop() - - if agent: - agent.stop() - print('stopped %s' % aid) - - -# ------------------------------------------------------------------------------ -# -if __name__ == "__main__": - - # FIXME: daemonization a'la component - - if len(sys.argv) != 2: - raise RuntimeError('missing parameter: agent id') - - bootstrap_3(sys.argv[1]) - - -# ------------------------------------------------------------------------------ diff --git a/bin/radical-pilot-agent_n b/bin/radical-pilot-agent_n new file mode 100755 index 0000000000..f0978724f6 --- /dev/null +++ b/bin/radical-pilot-agent_n @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 + +__copyright__ = "Copyright 2014-2019, http://radical.rutgers.edu" +__license__ = "MIT" + + +import os +from re import S +import sys +import time + +import threading as mt +import setproctitle as spt + +import radical.utils as ru +import radical.pilot as rp + + +# ------------------------------------------------------------------------------ +# +def main(sid, reg_addr, uid): + ''' + This method continues where the bootstrap_0/1/2 left off, and will now pass + control to the Agent class which will spawn the functional components. + Before doing so, we will check if we happen to be agent instance zero. If + that is the case, some additional python level bootstrap routines kick in, + to set the stage for component and sub-agent spawning. + + The agent interprets a config file, which will specify in an 'agents' + section: + - what nodes should be used for sub-agent startup + - what bridges should be started + - what are the endpoints for bridges which are not started + - what components should be started + agent_0 will create derived config files for all sub-agents. + ''' + + # basic setup: logger and profiler + log = ru.Logger(name=uid, ns='radical.pilot', path=os.getcwd()) + prof = ru.Profiler(name=uid, ns='radical.pilot', path=os.getcwd()) + + try: + prof.prof('comp_start', uid=uid) + prof.disable() + if uid == 'agent_0': + wrapped_agent_0(sid, reg_addr, uid, log, prof) + else: + wrapped_agent_n(sid, reg_addr, uid, log, prof) + + finally: + prof.enable() + prof.prof('comp_stop', uid=uid) + + +# ------------------------------------------------------------------------------ +# +def wrapped_main(sid, reg_addr, uid, log, prof): + + spt.setproctitle('rp.%s' % uid) + + if uid == 'agent_0': + agent = run_agent_0(sid, reg_addr, uid, log, prof) + else: + agent = run_agent_n(sid, reg_addr, uid, log, prof) + + agent.start() + + # agent runs - send heartbeats so that session knows about it + hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) + + def hb_beat_cb(): + hb_pub.put('heartbeat', msg={'uid': uid}) + + def hb_term_cb(hb_uid): + comp.stop() + term.set() + return None + + hb = ru.Heartbeat(uid=c_cfg.uid, + timeout=hb_cfg.timeout, + interval=hb_cfg.interval, + beat_cb=hb_beat_cb, + term_cb=hb_term_cb, + log=log) + hb.start() + + # register session heartbeat by beating once + hb.beat(uid=sid) + + # react on session heartbeats + def hb_sub_cb(topic, msg): + if msg['uid'] == sid: + hb.beat(uid=sid) + + ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, + topic='heartbeat', cb=hb_sub_cb, + log=log, prof=prof) + + # all is set up - we can sit idle 'til end of time. + while not term.is_set(): + time.sleep(1) + + +# ------------------------------------------------------------------------------ +# +def run_agent_0(sid, reg_addr, uid, log, prof): + + session = rp.Session(uid=sid, cfg=s_cfg, + _role=rp.Session._AGENT_0, _reg_addr=reg_addr) + + # session just started a registry - populate it further with agent related + # config sections + reg = ru.zmq.RegistryClient(url=reg_addr) + + reg['agent.%s.cfg' % uid] = a_cfg + + reg.close() + + + + agent = rp.Agent_0(a_cfg, session) + + return agent + + +# ------------------------------------------------------------------------------ +# +def run_agent_n(sid, reg_addr, uid, log, prof): + + reg = ru.zmq.RegistryClient(url=reg_addr) + + hb_cfg = ru.TypedDict(reg['heartbeat']) + a_cfg = ru.TypedDict(reg['agent.%s.cfg' % uid]) + + reg.close() + + session = rp.Session(uid=sid, cfg=s_cfg, + _role=rp.Session._AGENT_N, _reg_addr=reg_addr) + agent = rp.Agent_n(a_cfg, session) + + return agent + + +# ------------------------------------------------------------------------------ +# +if __name__ == "__main__": + + if len(sys.argv) != 4: + sys.stderr.write('error: invalid arguments\n' + 'usage: %s \n' % sys.argv[0]) + raise RuntimeError('invalid arguments: %s' % sys.argv) + + sid = sys.argv[1] + reg_addr = sys.argv[2] + uid = sys.argv[3] + + ru.daemonize(main=main, args=[sid, reg_addr, uid], + stdout='%s.out' % uid, stderr='%s.err' % uid) + sys.exit(0) + + +# ------------------------------------------------------------------------------ + diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index 0417055462..a170f09b2b 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -95,25 +95,26 @@ def main(sid, reg_addr, uid): # def wrapped_main(sid, reg_addr, uid, log, prof): + spt.setproctitle('rp.%s' % uid) + term = mt.Event() reg = ru.zmq.RegistryClient(url=reg_addr) hb_cfg = ru.TypedDict(reg['heartbeat']) b_cfg = ru.TypedDict(reg['bridges.%s.cfg' % uid]) - spt.setproctitle('rp.%s' % uid) - - # create the bridge, store connection addresses in FS, and begin to work + # create the instance and begin to work bridge = ru.zmq.Bridge.create(uid, cfg=b_cfg) reg['bridges.%s.addr_%s' % (uid, bridge.type_in )] = str(bridge.addr_in) reg['bridges.%s.addr_%s' % (uid, bridge.type_out)] = str(bridge.addr_out) + reg.close() bridge.start() # bridge runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, - log=log, prof=prof) + # component runs - send heartbeats so that session knows about it + hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): hb_pub.put('heartbeat', msg={'uid': uid}) @@ -131,13 +132,13 @@ def wrapped_main(sid, reg_addr, uid, log, prof): log=log) hb.start() - # register cmgr heartbeat by beating once - hb.beat(uid=b_cfg.cmgr) + # register session heartbeat by beating once + hb.beat(uid=sid) - # react on cmgr heartbeats + # react on session heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == b_cfg.cmgr: - hb.beat(uid=b_cfg.cmgr) + if msg['uid'] == sid: + hb.beat(uid=sid) ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, topic='heartbeat', cb=hb_sub_cb, diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 77d324b586..fe63098254 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -14,8 +14,6 @@ import setproctitle as spt import radical.utils as ru import radical.pilot as rp -dh = ru.DebugHelper() - # ------------------------------------------------------------------------------ # @@ -72,25 +70,26 @@ def main(sid, reg_addr, uid): # def wrapped_main(sid, reg_addr, uid, log, prof): + spt.setproctitle('rp.%s' % uid) + term = mt.Event() reg = ru.zmq.RegistryClient(url=reg_addr) hb_cfg = ru.TypedDict(reg['heartbeat']) c_cfg = ru.TypedDict(reg['components.%s.cfg' % uid]) - spt.setproctitle('rp.%s' % uid) + reg.close() # start a non-primary session session = rp.Session(uid=sid, cfg=c_cfg, _role=rp.Session._DEFAULT, _reg_addr=reg_addr) - # create the component and begin to work + # create the instance and begin to work comp = rp.utils.Component.create(c_cfg, session) comp.start() - # component runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub) - # log=session._log, prof=session._prof) + # component runs - send heartbeats so that session knows about it + hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): hb_pub.put('heartbeat', msg={'uid': uid}) @@ -108,13 +107,13 @@ def wrapped_main(sid, reg_addr, uid, log, prof): log=log) hb.start() - # register cmgr heartbeat by beating once - hb.beat(uid=c_cfg.cmgr) + # register session heartbeat by beating once + hb.beat(uid=sid) - # record cmgr heartbeats + # react on session heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == c_cfg.cmgr: - hb.beat(uid=c_cfg.cmgr) + if msg['uid'] == sid: + hb.beat(uid=sid) ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, topic='heartbeat', cb=hb_sub_cb, diff --git a/bin/radical-pilot-prte2prof b/bin/radical-pilot-prte2prof index f8cbbde3fc..9abec0883c 100755 --- a/bin/radical-pilot-prte2prof +++ b/bin/radical-pilot-prte2prof @@ -83,7 +83,7 @@ def handle_line(prof, line, pid, idmap): # print elems[2:] # ... DEBUG : prte output: [batch3:80017] [[33357,0],0] [1565343424.463101] ACTIVATE JOB [33357,0] STATE PENDING ALLOCATION AT ../../../../../../../source/prrte-dev/orte/tools/prte/prte.c:497 # ['... DEBUG : prte output:', 'batch3:80017', '33357,0', ',0', '1565343424.463101', 'ACTIVATE JOB', '33357,0', 'STATE PENDING ALLOCATION', '../../../../../../../source/prrte-dev/orte/tools/prte/prte.c:497'] - # _ '2019-08-09 05:37:34,815: agent.0 : MainProcess : DVMWatcher : DEBUG : prte output:', + # _ '2019-08-09 05:37:34,815: agent_0 : MainProcess : DVMWatcher : DEBUG : prte output:', # node 'batch3:80017', # dvm '33357,0', # dvmd ',0' diff --git a/setup.py b/setup.py index 6ac59bf830..78e1906742 100755 --- a/setup.py +++ b/setup.py @@ -242,7 +242,8 @@ def run(self): 'packages' : find_namespace_packages('src', include=['radical.*']), 'package_dir' : {'': 'src'}, 'scripts' : [ - 'bin/radical-pilot-agent', + 'bin/radical-pilot-agent_0', + 'bin/radical-pilot-agent_n', # 'bin/radical-pilot-agent-bridge', 'bin/radical-pilot-agent-funcs', 'bin/radical-pilot-agent-statepush', diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 66388ecf78..47fdde9bd2 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -40,18 +40,22 @@ class Agent_0(rpu.Worker): # -------------------------------------------------------------------------- # - def __init__(self, cfg: ru.Config): + def __init__(self): - self._uid = 'agent.0' + cfg = ru.Config(path='./agent_0.cfg') + + self._uid = cfg.uid self._pid = cfg.pid self._sid = cfg.sid + self._owner = cfg.owner self._pmgr = cfg.pmgr self._pwd = cfg.pilot_sandbox self._session = Session(uid=cfg.sid, cfg=cfg, _role=Session._AGENT_0) - self._cfg = self._session._cfg self._rcfg = self._session._rcfg - self._log = ru.Logger(self._uid, ns='radical.pilot') + + # init the worker / component base classes, connects registry + rpu.Worker.__init__(self, cfg, self._session) self._starttime = time.time() self._final_cause = None @@ -62,12 +66,8 @@ def __init__(self, cfg: ru.Config): self._services_setup = mt.Event() # this is the earliest point to sync bootstrap and agent profiles - self._prof = ru.Profiler(ns='radical.pilot', name=self._uid) self._prof.prof('hostname', uid=cfg.pid, msg=ru.get_hostname()) - # init the worker / component base classes, connects registry - rpu.Worker.__init__(self, cfg, self._session) - # configure ResourceManager before component startup, as components need # ResourceManager information for function (scheduler, executor) self._configure_rm() @@ -82,6 +82,17 @@ def __init__(self, cfg: ru.Config): self._write_sa_configs() self._start_sub_agents() # TODO: move to cmgr? + # regularly check for lifetime limit + self.register_timed_cb(self._check_lifetime, timer=10) + + # all set up - connect to proxy to fetch / push tasks + self._connect_proxy() + + + # -------------------------------------------------------------------------- + # + def _connect_proxy(self): + # at this point the session is up and connected, and it should have # brought up all communication bridges and components. We are # ready to rumble! @@ -96,17 +107,6 @@ def __init__(self, cfg: ru.Config): self.register_publisher(rpc.PROXY_STATE_PUBSUB) self.register_subscriber(rpc.STATE_PUBSUB, self._proxy_state_cb) - # regularly check for lifetime limit - self.register_timed_cb(self._check_lifetime, timer=10) - - # all set up - connect to proxy to fetch / push tasks - self._connect_proxy() - - - # -------------------------------------------------------------------------- - # - def _connect_proxy(self): - # # write config files for proxy channels # for p in self._cfg.proxy: # ru.write_json('%s.cfg' % p, self._cfg.proxy[p]) @@ -314,11 +314,11 @@ def finalize(self): out, err, log = '', '', '' - try : out = open('./agent.0.out', 'r').read(1024) + try : out = open('./agent_0.out', 'r').read(1024) except: pass - try : err = open('./agent.0.err', 'r').read(1024) + try : err = open('./agent_0.err', 'r').read(1024) except: pass - try : log = open('./agent.0.log', 'r').read(1024) + try : log = open('./agent_0.log', 'r').read(1024) except: pass if self._final_cause == 'timeout' : state = rps.DONE @@ -355,10 +355,10 @@ def _write_sa_configs(self): # we have all information needed by the subagents -- write the # sub-agent config files. - # write deep-copies of the config for each sub-agent (sans from agent.0) + # write deep-copies of the config for each sub-agent (sans from agent_0) for sa in self._rcfg.get('agents', {}): - assert (sa != 'agent.0'), 'expect subagent, not agent.0' + assert (sa != 'agent_0'), 'expect subagent, not agent_0' # use our own config sans agents/components/bridges as a basis for # the sub-agent config. @@ -372,14 +372,14 @@ def _write_sa_configs(self): tmp_cfg['uid'] = sa tmp_cfg['aid'] = sa - tmp_cfg['owner'] = 'agent.0' + tmp_cfg['owner'] = 'agent_0' # -------------------------------------------------------------------------- # def _start_services(self): - service_descriptions = self._rcfg.services + service_descriptions = self._cfg.services if not service_descriptions: return self._log.info('starting agent services') @@ -395,7 +395,6 @@ def _start_services(self): cfg = self._cfg tid = ru.generate_id('service.%(item_counter)04d', ru.ID_CUSTOM, ns=self._cfg.sid) - task = dict() task['origin'] = 'agent' task['description'] = td.as_dict() @@ -442,7 +441,6 @@ def _service_state_cb(self, topic, msg): # pylint: disable=unused-argument self._log.debug('service state update %s: %s', service['uid'], service['state']) - if service['state'] != rps.AGENT_EXECUTING: continue diff --git a/src/radical/pilot/agent/agent_n.py b/src/radical/pilot/agent/agent_n.py index caf41b0d24..ba05f1c229 100644 --- a/src/radical/pilot/agent/agent_n.py +++ b/src/radical/pilot/agent/agent_n.py @@ -20,7 +20,7 @@ class Agent_n(rpu.Worker): # -------------------------------------------------------------------------- # - def __init__(self, cfg): + def __init__(self, cfg: ru.Config, session): self._cfg = cfg self._sid = cfg.sid @@ -30,7 +30,7 @@ def __init__(self, cfg): self._sid = cfg.sid self._reg_addr = cfg.reg_addr - self._session = Session(uid=cfg.sid, cfg=cfg, _role=Session._AGENT_N) + self._session = session # log / profile via session until component manager is initialized self._log = self._session._log diff --git a/src/radical/pilot/agent/bootstrap_0.sh b/src/radical/pilot/agent/bootstrap_0.sh index f566adeedc..b2114864cd 100755 --- a/src/radical/pilot/agent/bootstrap_0.sh +++ b/src/radical/pilot/agent/bootstrap_0.sh @@ -90,7 +90,7 @@ SDISTS= RUNTIME= VIRTENV= VIRTENV_MODE= -CCM= +LAUNCHER= PILOT_ID= RP_VERSION= PYTHON= @@ -1448,16 +1448,6 @@ $cmd" } -# ------------------------------------------------------------------------------- -# -# Build the PREBOOTSTRAP2 variable to pass down to sub-agents -# -add_services() -{ - echo "$* &" >> ./services -} - - # ------------------------------------------------------------------------------- # # untar the pilot sandbox @@ -1500,17 +1490,16 @@ untar() # # NOTE: -z makes some assumptions on sandbox and tarball location # -while getopts "a:b:cd:e:f:h:i:j:m:p:r:s:t:v:w:x:y:z:" OPTION; do +while getopts "a:b:cd:e:f:h:i:m:p:r:s:t:v:w:x:y:z:" OPTION; do case $OPTION in a) SESSION_SANDBOX="$OPTARG" ;; b) PYTHON_DIST="$OPTARG" ;; - c) CCM='TRUE' ;; + c) LAUNCHER='ccmrun' ;; d) SDISTS="$OPTARG" ;; e) pre_bootstrap_0 "$OPTARG" ;; f) FORWARD_TUNNEL_ENDPOINT="$OPTARG" ;; h) HOSTPORT="$OPTARG" ;; i) PYTHON="$OPTARG" ;; - j) add_services "$OPTARG" ;; m) VIRTENV_MODE="$OPTARG" ;; p) PILOT_ID="$OPTARG" ;; r) RP_VERSION="$OPTARG" ;; @@ -1566,18 +1555,6 @@ touch "$LOGFILES_TARBALL" touch "$PROFILES_TARBALL" -# At this point, all pre_bootstrap_0 commands have been executed. We copy the -# resulting PATH and LD_LIBRARY_PATH, and apply that in bootstrap_2.sh, so that -# the sub-agents start off with the same env (or at least the relevant parts of -# it). -# -# This assumes that the env is actually transferable. If that assumption -# breaks at some point, we'll have to either only transfer the incremental env -# changes, or reconsider the approach to pre_bootstrap_x commands altogether -- -# see comment in the pre_bootstrap_0 function. -PB1_PATH="$PATH" -PB1_LDLB="$LD_LIBRARY_PATH" - # FIXME: By now the pre_process rules are already performed. # We should split the parsing and the execution of those. # "bootstrap start" is here so that $PILOT_ID is known. @@ -1728,24 +1705,6 @@ create_deactivate # ------------------------------------------------------------------------------ # launch the radical agent # -# the actual agent script lives in PWD if it was staged -- otherwise we use it -# from the virtenv -# NOTE: For some reasons, I have seen installations where 'scripts' go into -# bin/, and some where setuptools only changes them in place. For now, -# we allow for both -- but eventually (once the agent itself is small), -# we may want to move it to bin ourself. At that point, we probably -# have re-implemented pip... :/ -# FIXME: the second option should use $RP_MOD_PATH, or should derive the path -# from the imported rp modules __file__. -PILOT_SCRIPT=`which radical-pilot-agent` - -if test -z "$PILOT_SCRIPT" -then - echo "ERROR: rp installation incomplete?" - env_dump > env.rp.error - exit 1 -fi - # after all is said and done, we should end up with a usable python version. # Verify it @@ -1755,15 +1714,12 @@ verify_install # is independent of its location in the pilot VE test -z $(which radical-gtod) || cp $(which radical-gtod) ./gtod -AGENT_CMD="$PYTHON $PILOT_SCRIPT" - verify_rp_install # TODO: (re)move this output? echo echo "# -------------------------------------------------------------------" echo "# Launching radical-pilot-agent " -echo "# CMDLINE: $AGENT_CMD" # At this point we expand the variables in $PREBOOTSTRAP2 to pick up the # changes made by the environment by pre_bootstrap_0. @@ -1809,75 +1765,40 @@ else BS_SHELL='/bin/sh' fi -cat > bootstrap_2.sh < bootstrap_2.sh <> services.out 2>> services.err -else - # start a sub-agent - exec $AGENT_CMD "\$1" 1>>"\$1.out" 2>>"\$1.err" -fi +# start (sub) agent +exec radical-pilot-agent_n "\$sid" "\$reg_addr" "\$uid" \\ + 1>>"bootstrap_2.\$uid.out" \\ + 2>>"bootstrap_2.\$uid.err" EOT chmod 0755 bootstrap_2.sh # ------------------------------------------------------------------------------ -# add a `wait` to the services script -test -f ./services && echo 'wait' >> ./services -test -f ./services && chmod 0755 ./services - -# start the master agent instance (zero) +# start the master agent instance (agent_0) in the bs0 environment profile_event 'bootstrap_0_ok' -if test -z "$CCM"; then - ./bootstrap_2.sh 'agent.0' \ - 1>> agent.0.bootstrap_2.out \ - 2>> agent.0.bootstrap_2.err & -else - ccmrun ./bootstrap_2.sh 'agent.0' \ - 1>> agent.0.bootstrap_2.out \ - 2>> agent.0.bootstrap_2.err & -fi + +$LAUNCHER radical-pilot-agent_0 1>>agent_0.out 2>>agent_0.err & + AGENT_PID=$! pilot_state="PMGR_ACTIVE" @@ -1929,7 +1850,7 @@ echo "# CLEANUP: $CLEANUP" echo "#" profile_event 'cleanup_start' -contains $CLEANUP 'l' && rm -r "$PILOT_SANDBOX/agent.*" +contains $CLEANUP 'l' && rm -r "$PILOT_SANDBOX/agent_*" contains $CLEANUP 'u' && rm -r "$PILOT_SANDBOX/task.*" contains $CLEANUP 'v' && rm -r "$VIRTENV/" # FIXME: in what cases? contains $CLEANUP 'e' && rm -r "$PILOT_SANDBOX/" diff --git a/src/radical/pilot/agent/launch_method/mpirun.py b/src/radical/pilot/agent/launch_method/mpirun.py index e06caa3230..cad1d6c88c 100644 --- a/src/radical/pilot/agent/launch_method/mpirun.py +++ b/src/radical/pilot/agent/launch_method/mpirun.py @@ -35,7 +35,7 @@ def _init_from_scratch(self, env, env_sh): components (including Raptor and other task overlays) can use them to launch tasks. - The first use (likely in `agent.0`) will call this initializer to + The first use (likely in `agent_0`) will call this initializer to inspect LM properties. Later uses will be able to use the information gathered and should re-initialize via `_init_from_info()`, using the info dict returned here. diff --git a/src/radical/pilot/configs/agent_debug_sa.json b/src/radical/pilot/configs/agent_debug_sa.json index 1a7e5954f6..e74a62fd50 100644 --- a/src/radical/pilot/configs/agent_debug_sa.json +++ b/src/radical/pilot/configs/agent_debug_sa.json @@ -71,7 +71,7 @@ # }, # # "agents": { - # "agent.1": { + # "agent_1": { # "target": "node", # "components": { # "agent_staging_input" : {"count" : 1}, @@ -79,7 +79,7 @@ # "agent_staging_output" : {"count" : 1} # } # }, - # "agent.2": { + # "agent_2": { # "target": "node", # "components": { # "agent_staging_input" : {"count" : 1}, diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index 9a785420b3..2c4e67dde5 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -4,7 +4,7 @@ # a functional pilot agent, without any component redundency. { - # agent.0 must always have target 'local' at this point + # agent_0 must always have target 'local' at this point # mode 'shared' : local node is also used for CUs # mode 'reserved' : local node is reserved for the agent # FIXME: mode is unused diff --git a/src/radical/pilot/configs/agent_default_sa.json b/src/radical/pilot/configs/agent_default_sa.json index 5f3e2f2c49..9d4fa39dfd 100644 --- a/src/radical/pilot/configs/agent_default_sa.json +++ b/src/radical/pilot/configs/agent_default_sa.json @@ -4,7 +4,7 @@ # a functional pilot agent, without any component redundency. { - # agent.0 must always have target 'local' at this point + # agent_0 must always have target 'local' at this point # mode 'shared' : local node is also used for CUs # mode 'reserved' : local node is reserved for the agent # FIXME: mode is unused diff --git a/src/radical/pilot/configs/agent_rhea.json b/src/radical/pilot/configs/agent_rhea.json index cc6991cc5d..ce20ed5225 100644 --- a/src/radical/pilot/configs/agent_rhea.json +++ b/src/radical/pilot/configs/agent_rhea.json @@ -32,31 +32,31 @@ }, "agents": { - "agent.1": { + "agent_1": { "target": "node", "components": { "AgentStagingInputComponent" : {"count" : 1} } }, - "agent.2": { + "agent_2": { "target": "node", "components": { "AgentSchedulingComponent" : {"count" : 1} } }, - "agent.3": { + "agent_3": { "target": "node", "components": { "AgentSchedulingComponent" : {"count" : 1} } }, - "agent.4": { + "agent_4": { "target": "node", "components": { "AgentStagingOutputComponent" : {"count" : 1} } }, - "agent.5": { + "agent_5": { "target": "node", "components": { "UpdateWorker" : {"count" : 1} diff --git a/src/radical/pilot/configs/agent_summit_sa.json b/src/radical/pilot/configs/agent_summit_sa.json index e3b3ed94f5..3e72b82a6d 100644 --- a/src/radical/pilot/configs/agent_summit_sa.json +++ b/src/radical/pilot/configs/agent_summit_sa.json @@ -43,7 +43,7 @@ }, "agents": { - "agent.1": { + "agent_1": { "target": "node", "components": { "AgentStagingInputComponent" : {"count" : 1}, @@ -51,7 +51,7 @@ "AgentExecutingComponent" : {"count" : 1} } }, - "agent.2": { + "agent_2": { "target": "node", "components": { "AgentStagingInputComponent" : {"count" : 1}, diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index 607a4f21d7..ef7a4b0bd0 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -856,7 +856,11 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): for arg in pre_bootstrap_0: bs_args.extend(['-e', arg]) for arg in pre_bootstrap_1: bs_args.extend(['-w', arg]) + agent_cfg['uid'] = 'agent_0' + agent_cfg['sid'] = sid + agent_cfg['pid'] = pid agent_cfg['owner'] = pid + agent_cfg['pmgr'] = self._pmgr agent_cfg['resource'] = resource agent_cfg['nodes'] = requested_nodes agent_cfg['cores'] = allocated_cores @@ -866,10 +870,6 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): agent_cfg['runtime'] = runtime agent_cfg['app_comm'] = app_comm agent_cfg['proxy_url'] = agent_proxy_url - agent_cfg['sid'] = sid - agent_cfg['pid'] = pid - agent_cfg['pmgr'] = self._pmgr - agent_cfg['logdir'] = '.' agent_cfg['pilot_sandbox'] = pilot_sandbox agent_cfg['session_sandbox'] = session_sandbox agent_cfg['resource_sandbox'] = resource_sandbox @@ -890,13 +890,12 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): pilot['cfg'] = agent_cfg pilot['resources'] = {'cpu': allocated_cores, 'gpu': allocated_gpus} - pilot['$set'] = ['resources'] # ---------------------------------------------------------------------- # Write agent config dict to a json file in pilot sandbox. - agent_cfg_name = 'agent.0.cfg' + agent_cfg_name = 'agent_0.cfg' cfg_tmp_handle, cfg_tmp_file = tempfile.mkstemp(prefix='rp.agent_cfg.') os.close(cfg_tmp_handle) # file exists now diff --git a/src/radical/pilot/pmgr/launching/default.py b/src/radical/pilot/pmgr/launching/default.py deleted file mode 100644 index c4327b2233..0000000000 --- a/src/radical/pilot/pmgr/launching/default.py +++ /dev/null @@ -1,1256 +0,0 @@ -# pylint: disable=protected-access - -__copyright__ = "Copyright 2013-2016, http://radical.rutgers.edu" -__license__ = "MIT" - - -import os -import copy -import math -import time -import pprint -import shutil -import tempfile -import threading as mt - -import radical.gtod as rg -import radical.utils as ru -import radical.saga as rs - -from ... import states as rps -from ... import constants as rpc -from ... import utils as rpu - -from .base import PMGRLaunchingComponent - - -from ...staging_directives import complete_url, expand_staging_directives - - -# ------------------------------------------------------------------------------ -# local constants -DEFAULT_AGENT_SPAWNER = 'POPEN' -DEFAULT_RP_VERSION = 'local' -DEFAULT_VIRTENV_MODE = 'update' -DEFAULT_VIRTENV_DIST = 'default' -DEFAULT_AGENT_CONFIG = 'default' - -JOB_CANCEL_DELAY = 120 # seconds between cancel signal and job kill -JOB_CHECK_INTERVAL = 60 # seconds between runs of the job state check loop -JOB_CHECK_MAX_MISSES = 3 # number of times to find a job missing before - # declaring it dead - -LOCAL_SCHEME = 'file' - - -# ------------------------------------------------------------------------------ -# -class Default(PMGRLaunchingComponent): - - # -------------------------------------------------------------------------- - # - def __init__(self, cfg, session): - - PMGRLaunchingComponent.__init__(self, cfg, session) - - - # -------------------------------------------------------------------------- - # - def initialize(self): - - # we don't really have an output queue, as we pass control over the - # pilot jobs to the resource management system (ResourceManager). - - self._pilots = dict() # dict for all known pilots - self._pilots_lock = mt.RLock() # lock on maipulating the above - self._checking = list() # pilots to check state on - self._check_lock = mt.RLock() # lock on maipulating the above - self._saga_js_cache = dict() # cache of saga job services - self._sandboxes = dict() # cache of resource sandbox URLs - self._cache_lock = mt.RLock() # lock for cache - - self._mod_dir = os.path.dirname(os.path.abspath(__file__)) - self._root_dir = "%s/../../" % self._mod_dir - - self.register_input(rps.PMGR_LAUNCHING_PENDING, - rpc.PMGR_LAUNCHING_QUEUE, self.work) - - self._stager_queue = self.get_output_ep(rpc.STAGER_REQUEST_QUEUE) - - # FIXME: make interval configurable - self.register_timed_cb(self._pilot_watcher_cb, timer=10.0) - - # we listen for pilot cancel commands - self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) - - # also listen for completed staging directives - self.register_subscriber(rpc.STAGER_RESPONSE_PUBSUB, self._staging_ack_cb) - self._active_sds = dict() - self._sds_lock = mt.Lock() - - - self._log.info(ru.get_version([self._mod_dir, self._root_dir])) - self._rp_version, _, _, _, self._rp_sdist_name, self._rp_sdist_path = \ - ru.get_version([self._mod_dir, self._root_dir]) - - - # -------------------------------------------------------------------------- - # - def finalize(self): - - try: - self.unregister_timed_cb(self._pilot_watcher_cb) - self.unregister_input(rps.PMGR_LAUNCHING_PENDING, - rpc.PMGR_LAUNCHING_QUEUE, self.work) - - # FIXME: always kill all saga jobs for non-final pilots at termination, - # and set the pilot states to CANCELED. This will conflict with - # disconnect/reconnect semantics. - with self._pilots_lock: - pids = list(self._pilots.keys()) - - self._cancel_pilots(pids) - self._kill_pilots(pids) - - with self._cache_lock: - for url,js in self._saga_js_cache.items(): - self._log.debug('close js %s', url) - js.close() - - except: - self._log.exception('finalization error') - - - # -------------------------------------------------------------------------- - # - def _control_cb(self, topic, msg): - - cmd = msg['cmd'] - arg = msg['arg'] - - self._log.debug('launcher got %s', msg) - - if cmd == 'cancel_pilots': - - # on cancel_pilot requests, we forward the DB entries via MongoDB, - # by pushing a pilot update. We also mark the pilot for - # cancelation, so that the pilot watcher can cancel the job after - # JOB_CANCEL_DELAY seconds, in case the pilot did not react on the - # command in time. - - pmgr = arg['pmgr'] - pids = arg['uids'] - - if pmgr != self._pmgr: - # this request is not for us to enact - return True - - if not isinstance(pids, list): - pids = [pids] - - self._log.info('received pilot_cancel command (%s)', pids) - - self._cancel_pilots(pids) - - - return True - - - # -------------------------------------------------------------------------- - # - def _pilot_watcher_cb(self): - - # FIXME: we should actually use SAGA job state notifications! - # FIXME: check how race conditions are handles: we may detect - # a finalized SAGA job and change the pilot state -- but that - # pilot may have transitioned into final state via the normal - # notification mechanism already. That probably should be sorted - # out by the pilot manager, which will receive notifications for - # both transitions. As long as the final state is the same, - # there should be no problem anyway. If it differs, the - # 'cleaner' final state should prevail, in this ordering: - # cancel - # timeout - # error - # disappeared - # This implies that we want to communicate 'final_cause' - - # we don't want to lock our members all the time. For that reason we - # use a copy of the pilots_tocheck list and iterate over that, and only - # lock other members when they are manipulated. - tc = rs.job.Container() - with self._pilots_lock, self._check_lock: - - for pid in self._checking: - tc.add(self._pilots[pid]['job']) - - states = tc.get_states() - - self._log.debug('bulk states: %s', states) - - # if none of the states is final, we have nothing to do. - # We can't rely on the ordering of tasks and states in the task - # container, so we hope that the task container's bulk state query lead - # to a caching of state information, and we thus have cache hits when - # querying the pilots individually - - final_pilots = list() - with self._pilots_lock, self._check_lock: - - for pid in self._checking: - - state = self._pilots[pid]['job'].state - self._log.debug('saga job state: %s %s %s', pid, self. - _pilots[pid]['job'], state) - - if state in [rs.job.DONE, rs.job.FAILED, rs.job.CANCELED]: - pilot = self._pilots[pid]['pilot'] - if state == rs.job.DONE : pilot['state'] = rps.DONE - if state == rs.job.FAILED : pilot['state'] = rps.FAILED - if state == rs.job.CANCELED: pilot['state'] = rps.CANCELED - final_pilots.append(pilot) - - if final_pilots: - - for pilot in final_pilots: - - with self._check_lock: - # stop monitoring this pilot - self._checking.remove(pilot['uid']) - - self._log.debug('final pilot %s %s', pilot['uid'], pilot['state']) - - self.advance(final_pilots, push=False, publish=True) - - # all checks are done, final pilots are weeded out. Now check if any - # pilot is scheduled for cancellation and is overdue, and kill it - # forcefully. - to_cancel = list() - with self._pilots_lock: - - for pid in self._pilots: - - pilot = self._pilots[pid]['pilot'] - time_cr = pilot.get('cancel_requested') - - # check if the pilot is final meanwhile - if pilot['state'] in rps.FINAL: - continue - - if time_cr and time_cr + JOB_CANCEL_DELAY < time.time(): - self._log.debug('pilot needs killing: %s : %s + %s < %s', - pid, time_cr, JOB_CANCEL_DELAY, time.time()) - del(pilot['cancel_requested']) - self._log.debug(' cancel pilot %s', pid) - to_cancel.append(pid) - - if to_cancel: - self._kill_pilots(to_cancel) - - return True - - - # -------------------------------------------------------------------------- - # - def _cancel_pilots(self, pids): - ''' - Send a cancellation request to the pilots. This call will not wait for - the request to get enacted, nor for it to arrive, but just send it. - ''' - - if not pids or not self._pilots: - # nothing to do - return - - # recod time of request, so that forceful termination can happen - # after a certain delay - now = time.time() - with self._pilots_lock: - for pid in pids: - if pid in self._pilots: - self._log.debug('update cancel req: %s %s', pid, now) - self._pilots[pid]['pilot']['cancel_requested'] = now - - - # -------------------------------------------------------------------------- - # - def _kill_pilots(self, pids): - ''' - Forcefully kill a set of pilots. For pilots which have just recently be - cancelled, we will wait a certain amount of time to give them a chance - to termimate on their own (which allows to flush profiles and logfiles, - etc). After that delay, we'll make sure they get killed. - ''' - - self._log.debug('killing pilots: %s', pids) - - if not pids or not self._pilots: - # nothing to do - return - - # find the most recent cancellation request - with self._pilots_lock: - self._log.debug('killing pilots: %s', - [p['pilot'].get('cancel_requested', 0) - for p in list(self._pilots.values())]) - last_cancel = max([p['pilot'].get('cancel_requested', 0) - for p in list(self._pilots.values())]) - - self._log.debug('killing pilots: last cancel: %s', last_cancel) - - # we wait for up to JOB_CANCEL_DELAY for a pilt - while time.time() < (last_cancel + JOB_CANCEL_DELAY): - - self._log.debug('killing pilots: check %s < %s + %s', - time.time(), last_cancel, JOB_CANCEL_DELAY) - - alive_pids = list() - for pid in pids: - - if pid not in self._pilots: - self._log.error('unknown: %s', pid) - raise ValueError('unknown pilot %s' % pid) - - pilot = self._pilots[pid]['pilot'] - if pilot['state'] not in rps.FINAL: - self._log.debug('killing pilots: alive %s', pid) - alive_pids.append(pid) - else: - self._log.debug('killing pilots: dead %s', pid) - - pids = alive_pids - if not alive_pids: - # nothing to do anymore - return - - # avoid busy poll) - time.sleep(1) - - to_advance = list() - - # we don't want the watcher checking for these pilot anymore - with self._check_lock: - for pid in pids: - if pid in self._checking: - self._checking.remove(pid) - - - self._log.debug('killing pilots: kill! %s', pids) - try: - with self._pilots_lock: - tc = rs.job.Container() - for pid in pids: - - if pid not in self._pilots: - self._log.error('unknown: %s', pid) - raise ValueError('unknown pilot %s' % pid) - - pilot = self._pilots[pid]['pilot'] - job = self._pilots[pid]['job'] - - # don't overwrite resource_details from the agent - if 'resource_details' in pilot: - del(pilot['resource_details']) - - if pilot['state'] in rps.FINAL: - continue - - self._log.debug('plan cancellation of %s : %s', pilot, job) - to_advance.append(pilot) - self._log.debug('request cancel for %s', pilot['uid']) - tc.add(job) - - self._log.debug('cancellation start') - tc.cancel() - tc.wait() - self._log.debug('cancellation done') - - # set canceled state - self.advance(to_advance, state=rps.CANCELED, push=False, publish=True) - - except Exception: - self._log.exception('pilot kill failed') - - return True - - - # -------------------------------------------------------------------------- - # - def work(self, pilots): - - if not isinstance(pilots, list): - pilots = [pilots] - - self.advance(pilots, rps.PMGR_LAUNCHING, publish=True, push=False) - - # We can only use bulk submission for pilots which go to the same - # target, thus we sort them into buckets and lunch the buckets - # individually - buckets = dict() - for pilot in pilots: - resource = pilot['description']['resource'] - schema = pilot['description']['access_schema'] - if resource not in buckets: - buckets[resource] = dict() - if schema not in buckets[resource]: - buckets[resource][schema] = list() - buckets[resource][schema].append(pilot) - - for resource in buckets: - - for schema in buckets[resource]: - - try: - pilots = buckets[resource][schema] - pids = [p['uid'] for p in pilots] - self._log.info("Launching pilots on %s: %s", resource, pids) - - self._start_pilot_bulk(resource, schema, pilots) - - self.advance(pilots, rps.PMGR_ACTIVE_PENDING, - push=False, publish=True) - - except Exception: - self._log.exception('bulk launch failed') - self.advance(pilots, rps.FAILED, push=False, publish=True) - - - # -------------------------------------------------------------------------- - # - def _start_pilot_bulk(self, resource, schema, pilots): - ''' - For each pilot, we prepare by determining what files need to be staged, - and what job description needs to be submitted. Files are then be - staged, and jobs are submitted. - - Two files are staged: a bootstrapper and a tarball - the latter - containing the pilot sandboxes, agent configs, and any other auxilliary - files needed to bootstrap. The bootstrapper will untar those parts of - the tarball which it needs to bootstrap one specific pilot. - ''' - - rcfg = self._session.get_resource_config(resource, schema) - sid = self._session.uid - - # ---------------------------------------------------------------------- - # the rcfg can contain keys with string expansion placeholders where - # values from the pilot description need filling in. A prominent - # example is `%(pd.project)s`, where the pilot description's `PROJECT` - # value needs to be filled in (here in lowercase). - # - # FIXME: right now we assume all pilot descriptions to contain similar - # entries, so that the expansion is only done on the first PD. - expand = dict() - pd = pilots[0]['description'] - for k,v in pd.items(): - if v is None: - v = '' - expand['pd.%s' % k] = v - if isinstance(v, str): - expand['pd.%s' % k.upper()] = v.upper() - expand['pd.%s' % k.lower()] = v.lower() - else: - expand['pd.%s' % k.upper()] = v - expand['pd.%s' % k.lower()] = v - - for k in rcfg: - if isinstance(rcfg[k], str): - orig = rcfg[k] - rcfg[k] = rcfg[k] % expand - expanded = rcfg[k] - if orig != expanded: - self._log.debug('RCFG:\n%s\n%s', orig, expanded) - - # we create a fake session_sandbox with all pilot_sandboxes in /tmp, and - # then tar it up. Once we untar that tarball on the target machine, we - # should have all sandboxes and all files required to bootstrap the - # pilots - tmp_dir = os.path.abspath(tempfile.mkdtemp(prefix='rp_agent_tmp')) - tar_name = '%s.%s.tgz' % (sid, self._uid) - tar_tgt = '%s/%s' % (tmp_dir, tar_name) - tar_url = rs.Url('file://localhost/%s' % tar_tgt) - - # we need the session sandbox url, but that is (at least in principle) - # dependent on the schema to use for pilot startup. So we confirm here - # that the bulk is consistent wrt. to the schema. Also include - # `staging_input` files and place them in the `pilot_sandbox`. - # - # FIXME: if it is not, it needs to be splitted into schema-specific - # sub-bulks - # - schema = pd.get('access_schema') - for pilot in pilots[1:]: - assert(schema == pilot['description'].get('access_schema')), \ - 'inconsistent scheme on launch / staging' - - # get and expand sandboxes (this bulk uses the same schema toward the - # same target resource, so all session sandboxes are the same) - # FIXME: expansion actually may differ per pilot (queue names, project - # names, etc could be expanded) - session_sandbox = self._session._get_session_sandbox(pilots[0]).path - session_sandbox = session_sandbox % expand - - # we will create the session sandbox before we untar, so we can use that - # as workdir, and pack all paths relative to that session sandbox. That - # implies that we have to recheck that all URLs in fact do point into - # the session sandbox. - # - # We also create a file `staging_output.json` for each pilot which - # contains the list of files to be tarred up and prepared for output - # staging. - - ft_list = list() # files to stage - jd_list = list() # jobs to submit - - for pilot in pilots: - - pid = pilot['uid'] - os.makedirs('%s/%s' % (tmp_dir, pid)) - - info = self._prepare_pilot(resource, rcfg, pilot, expand, tar_name) - - ft_list += info['fts'] - jd_list.append(info['jd']) - - self._prof.prof('staging_in_start', uid=pid) - - for fname in ru.as_list(pilot['description'].get('input_staging')): - base = os.path.basename(fname) - ft_list.append({'src': fname, - 'tgt': '%s/%s' % (pid, base), - 'rem': False}) - - output_staging = pilot['description'].get('output_staging') - if output_staging: - fname = '%s/%s/staging_output.txt' % (tmp_dir, pilot['uid']) - with ru.ru_open(fname, 'w') as fout: - for entry in output_staging: - fout.write('%s\n' % entry) - - # direct staging, use first pilot for staging context - # NOTE: this implies that the SDS can only refer to session - # sandboxes, not to pilot sandboxes! - self._stage_in(pilots[0], info['sds']) - - for ft in ft_list: - src = os.path.abspath(ft['src']) - tgt = os.path.relpath(os.path.normpath(ft['tgt']), session_sandbox) - # src_dir = os.path.dirname(src) - tgt_dir = os.path.dirname(tgt) - - if tgt_dir.startswith('..'): - tgt = ft['tgt'] - tgt_dir = os.path.dirname(tgt) - - if not os.path.isdir('%s/%s' % (tmp_dir, tgt_dir)): - os.makedirs('%s/%s' % (tmp_dir, tgt_dir)) - - if src == '/dev/null': - # we want an empty file -- touch it (tar will refuse to - # handle a symlink to /dev/null) - ru.ru_open('%s/%s' % (tmp_dir, tgt), 'a').close() - else: - # use a shell callout to account for wildcard expansion - cmd = 'ln -s %s %s/%s' % (os.path.abspath(src), tmp_dir, tgt) - out, err, ret = ru.sh_callout(cmd, shell=True) - if ret: - self._log.debug('out: %s', out) - self._log.debug('err: %s', err) - raise RuntimeError('callout failed: %s' % cmd) - - - # tar. If any command fails, this will raise. - cmd = "cd %s && tar zchf %s *" % (tmp_dir, tar_tgt) - out, err, ret = ru.sh_callout(cmd, shell=True) - - if ret: - self._log.debug('cmd: %s', cmd) - self._log.debug('out: %s', out) - self._log.debug('err: %s', err) - raise RuntimeError('callout failed: %s' % cmd) - - # remove all files marked for removal-after-pack - for ft in ft_list: - if ft['rem']: - os.unlink(ft['src']) - - fs_endpoint = rcfg['filesystem_endpoint'] - fs_url = rs.Url(fs_endpoint) - tar_rem = rs.Url(fs_url) - tar_rem.path = "%s/%s" % (session_sandbox, tar_name) - - self._log.debug('stage tarball for %s', pilots[0]['uid']) - self._stage_in(pilots[0], {'source': tar_url, - 'target': tar_rem, - 'action': rpc.TRANSFER}) - shutil.rmtree(tmp_dir) - - # FIXME: the untar was moved into the bootstrapper (see `-z`). That - # is actually only correct for the single-pilot case... - - for pilot in pilots: - self._prof.prof('staging_in_stop', uid=pilot['uid']) - self._prof.prof('submission_start', uid=pilot['uid']) - - # look up or create JS for actual pilot submission. This might result - # in the same js url as above, or not. - js_ep = rcfg['job_manager_endpoint'] - with self._cache_lock: - if js_ep in self._saga_js_cache: - js = self._saga_js_cache[js_ep] - else: - js = rs.job.Service(js_ep, session=self._session) - self._saga_js_cache[js_ep] = js - - # now that the scripts are in place and configured, - # we can launch the agent - jc = rs.job.Container() - - for jd in jd_list: - self._log.debug('jd: %s', pprint.pformat(jd.as_dict())) - jc.add(js.create_job(jd)) - - jc.run() - - # Order of tasks in `rs.job.Container().tasks` is not changing over the - # time, thus it's able to iterate over it and other list(s) all together - for j, pilot in zip(jc.get_tasks(), pilots): - - # do a quick error check - if j.state == rs.FAILED: - self._log.error('%s: %s : %s : %s', j.id, j.state, j.stderr, j.stdout) - raise RuntimeError("SAGA Job state is FAILED. (%s)" % j.name) - - pid = pilot['uid'] - - # Update the Pilot's state to 'PMGR_ACTIVE_PENDING' if SAGA job - # submission was successful. Since the pilot leaves the scope of - # the PMGR for the time being, we update the complete DB document - pilot['$all'] = True - - # FIXME: update the right pilot - with self._pilots_lock: - - self._pilots[pid] = dict() - self._pilots[pid]['pilot'] = pilot - self._pilots[pid]['job'] = j - - # make sure we watch that pilot - with self._check_lock: - self._checking.append(pid) - - for pilot in pilots: - self._prof.prof('submission_stop', uid=pilot['uid']) - - - # -------------------------------------------------------------------------- - # - def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): - - pid = pilot["uid"] - ret = {'fts': list(), # tar for staging - 'sds': list(), # direct staging - 'jd' : None } # job description - - # ---------------------------------------------------------------------- - # Database connection parameters - sid = self._session.uid - service_url = self._session.cfg.service_url - - # some default values are determined at runtime - default_virtenv = '%%(resource_sandbox)s/ve.%s.%s' % \ - (resource, self._rp_version) - - # ---------------------------------------------------------------------- - # pilot description and resource configuration - requested_nodes = pilot['description']['nodes'] - requested_cores = pilot['description']['cores'] - requested_gpus = pilot['description']['gpus'] - requested_memory = pilot['description']['memory'] - runtime = pilot['description']['runtime'] - app_comm = pilot['description']['app_comm'] - queue = pilot['description']['queue'] - job_name = pilot['description']['job_name'] - project = pilot['description']['project'] - cleanup = pilot['description']['cleanup'] - candidate_hosts = pilot['description']['candidate_hosts'] - services = pilot['description']['services'] - - # ---------------------------------------------------------------------- - # get parameters from resource cfg, set defaults where needed - agent_service_url = rcfg.get('agent_service_url', service_url) - agent_spawner = rcfg.get('agent_spawner', DEFAULT_AGENT_SPAWNER) - agent_config = rcfg.get('agent_config', DEFAULT_AGENT_CONFIG) - agent_scheduler = rcfg.get('agent_scheduler') - tunnel_bind_device = rcfg.get('tunnel_bind_device') - default_queue = rcfg.get('default_queue') - forward_tunnel_endpoint = rcfg.get('forward_tunnel_endpoint') - resource_manager = rcfg.get('resource_manager') - pre_bootstrap_0 = rcfg.get('pre_bootstrap_0', []) - pre_bootstrap_1 = rcfg.get('pre_bootstrap_1', []) - cores_per_node = rcfg.get('cores_per_node', 0) - gpus_per_node = rcfg.get('gpus_per_node', 0) - blocked_cores = rcfg.get('blocked_cores', []) - blocked_gpus = rcfg.get('blocked_gpus', []) - lfs_path_per_node = rcfg.get('lfs_path_per_node') - lfs_size_per_node = rcfg.get('lfs_size_per_node', 0) - python_interpreter = rcfg.get('python_interpreter') - python_dist = rcfg.get('python_dist') - virtenv_dist = rcfg.get('virtenv_dist', DEFAULT_VIRTENV_DIST) - virtenv_mode = rcfg.get('virtenv_mode', DEFAULT_VIRTENV_MODE) - virtenv = rcfg.get('virtenv', default_virtenv) - rp_version = rcfg.get('rp_version') - task_tmp = rcfg.get('task_tmp') - spmd_variation = rcfg.get('spmd_variation') - task_pre_launch = rcfg.get('task_pre_launch') - task_pre_exec = rcfg.get('task_pre_exec') - task_pre_rank = rcfg.get('task_pre_rank') - task_post_launch = rcfg.get('task_post_launch') - task_post_exec = rcfg.get('task_post_exec') - task_post_rank = rcfg.get('task_post_rank') - mandatory_args = rcfg.get('mandatory_args', []) - system_architecture = rcfg.get('system_architecture', {}) - saga_jd_supplement = rcfg.get('saga_jd_supplement', {}) - services += rcfg.get('services', []) - - self._log.debug(pprint.pformat(rcfg)) - - # make sure that mandatory args are known - for ma in mandatory_args: - if pilot['description'].get(ma) is None: - raise ValueError('attribute "%s" is required for "%s"' - % (ma, resource)) - - # get pilot and global sandbox - resource_sandbox = self._session._get_resource_sandbox(pilot) - session_sandbox = self._session._get_session_sandbox (pilot) - pilot_sandbox = self._session._get_pilot_sandbox (pilot) - client_sandbox = self._session._get_client_sandbox () - - pilot['resource_sandbox'] = str(resource_sandbox) % expand - pilot['session_sandbox'] = str(session_sandbox) % expand - pilot['pilot_sandbox'] = str(pilot_sandbox) % expand - pilot['client_sandbox'] = str(client_sandbox) - - # from here on we need only paths - resource_sandbox = resource_sandbox.path % expand - session_sandbox = session_sandbox .path % expand - pilot_sandbox = pilot_sandbox .path % expand - # client_sandbox = client_sandbox # not expanded - - if not job_name: - job_name = pid - - try: - if isinstance(agent_config, dict): - agent_cfg = ru.Config(cfg=agent_config) - - elif isinstance(agent_config, str): - agent_cfg = ru.Config('radical.pilot', - category='agent', - name=agent_config) - else: - # we can't handle this type - raise TypeError('agent config must be string or dict') - - except Exception: - self._log.exception('Error using agent config') - raise - - - # expand variables in virtenv string - virtenv = virtenv % {'pilot_sandbox' : pilot_sandbox, - 'session_sandbox' : session_sandbox, - 'resource_sandbox': resource_sandbox} - - # Check for deprecated global_virtenv - if 'global_virtenv' in rcfg: - raise RuntimeError("'global_virtenv' is deprecated (%s)" % resource) - - # Create a host:port string for use by the bootstrap_0. - tmp = rs.Url(agent_service_url) - if tmp.port: - hostport = "%s:%d" % (tmp.host, tmp.port) - else: - hostport = "%s:%d" % (tmp.host, 27017) # mongodb default - - # ---------------------------------------------------------------------- - # the version of the agent is derived from - # rp_version, which has the following format - # and interpretation: - # - # case rp_version: - # @: - # @tag/@branch/@commit: # no sdist staging - # git clone $github_base radical.pilot.src - # (cd radical.pilot.src && git checkout token) - # pip install -t $VIRTENV/rp_install/ radical.pilot.src - # rm -rf radical.pilot.src - # export PYTHONPATH=$VIRTENV/rp_install:$PYTHONPATH - # - # release: # no sdist staging - # pip install -t $VIRTENV/rp_install radical.pilot - # export PYTHONPATH=$VIRTENV/rp_install:$PYTHONPATH - # - # local: # needs sdist staging - # tar zxf $sdist.tgz - # pip install -t $SANDBOX/rp_install $sdist/ - # export PYTHONPATH=$SANDBOX/rp_install:$PYTHONPATH - # - # installed: # no sdist staging - # true - # esac - # - # virtenv_mode - # private : error if ve exists, otherwise create, then use - # update : update if ve exists, otherwise create, then use - # create : use if ve exists, otherwise create, then use - # use : use if ve exists, otherwise error, then exit - # recreate: delete if ve exists, otherwise create, then use - # local : use the client virtualenv (assumes same FS) - # - # examples : - # virtenv@v0.20 - # virtenv@devel - # virtenv@release - # virtenv@installed - # stage@local - # stage@/tmp/my_agent.py - # - # Note that some combinations may be invalid, - # specifically in the context of virtenv_mode. If, for - # example, virtenv_mode is 'use', then the 'virtenv:tag' - # will not make sense, as the virtenv is not updated. - # In those cases, the virtenv_mode is honored, and - # a warning is printed. - # - # Also, the 'stage' mode can only be combined with the - # 'local' source, or with a path to the agent (relative - # to root_dir, or absolute). - # - # A rp_version which does not adhere to the - # above syntax is ignored, and the fallback stage@local - # is used. - - if not rp_version: - if virtenv_mode == 'local': rp_version = 'installed' - else : rp_version = DEFAULT_RP_VERSION - - if not rp_version.startswith('@') and \ - rp_version not in ['installed', 'local', 'release']: - raise ValueError("invalid rp_version '%s'" % rp_version) - - if rp_version.startswith('@'): - rp_version = rp_version[1:] # strip '@' - - # use local VE ? - if virtenv_mode == 'local': - if os.environ.get('VIRTUAL_ENV'): - python_dist = 'default' - virtenv = os.environ['VIRTUAL_ENV'] - elif os.environ.get('CONDA_PREFIX'): - python_dist = 'anaconda' - virtenv = os.environ['CONDA_PREFIX'] - else: - # we can't use local - self._log.error('virtenv_mode is local, no local env found') - raise ValueError('no local env found') - - # ---------------------------------------------------------------------- - # sanity checks - RE = RuntimeError - if not python_dist : raise RE("missing python distribution") - if not virtenv_dist : raise RE("missing virtualenv distribution") - if not agent_spawner : raise RE("missing agent spawner") - if not agent_scheduler : raise RE("missing agent scheduler") - if not resource_manager : raise RE("missing resource manager") - - # massage some values - if not queue: - queue = default_queue - - if cleanup and isinstance(cleanup, bool): - # l : log files - # u : task work dirs - # v : virtualenv - # e : everything (== pilot sandbox) - cleanup = 'luve' - - # we never cleanup virtenvs which are not private - if virtenv_mode != 'private': - cleanup = cleanup.replace('v', '') - - # estimate requested resources - smt = os.environ.get('RADICAL_SAGA_SMT') - - if smt and system_architecture.get('smt'): - system_architecture['smt'] = int(smt) - - if smt and cores_per_node: - cores_per_node *= int(smt) - - avail_cores_per_node = cores_per_node - avail_gpus_per_node = gpus_per_node - - if avail_cores_per_node and blocked_cores: - avail_cores_per_node -= len(blocked_cores) - assert (avail_cores_per_node > 0) - - if avail_gpus_per_node and blocked_gpus: - avail_gpus_per_node -= len(blocked_gpus) - assert (avail_gpus_per_node >= 0) - - if requested_nodes or not requested_cores: - - if not cores_per_node: - raise RE('use "cores" in PilotDescription') - - requested_nodes = requested_nodes or 1 - requested_cores = requested_nodes * avail_cores_per_node - requested_gpus = requested_nodes * avail_gpus_per_node - - else: - - if avail_cores_per_node: - requested_nodes = requested_cores / avail_cores_per_node - - if avail_gpus_per_node: - requested_nodes = max(requested_gpus / avail_gpus_per_node, - requested_nodes) - - requested_nodes = math.ceil(requested_nodes) - - self._log.debug('nodes: %s [%s %s], cores: %s, gpus: %s', - requested_nodes, cores_per_node, gpus_per_node, - requested_cores, requested_gpus) - - # set mandatory args - bootstrap_args = "" - - # add dists to staging files, if needed: - # don't stage on `rp_version==installed` or `virtenv_mode==local` - if rp_version == 'installed' or \ - virtenv_mode == 'local' : - sdist_names = list() - sdist_paths = list() - else: - sdist_names = [rg.sdist_name, - ru.sdist_name, - rs.sdist_name, - self._rp_sdist_name] - sdist_paths = [rg.sdist_path, - ru.sdist_path, - rs.sdist_path, - self._rp_sdist_path] - bootstrap_args += " -d '%s'" % (':'.join(sdist_names)) - - bootstrap_args += " -p '%s'" % pid - bootstrap_args += " -s '%s'" % sid - bootstrap_args += " -m '%s'" % virtenv_mode - bootstrap_args += " -r '%s'" % rp_version - bootstrap_args += " -b '%s'" % python_dist - bootstrap_args += " -g '%s'" % virtenv_dist - bootstrap_args += " -v '%s'" % virtenv - bootstrap_args += " -y '%d'" % runtime - bootstrap_args += " -z '%s'" % tar_name - - # set optional args - if resource_manager == "CCM": bootstrap_args += " -c" - if forward_tunnel_endpoint: bootstrap_args += " -f '%s'" % forward_tunnel_endpoint - if forward_tunnel_endpoint: bootstrap_args += " -h '%s'" % hostport - if python_interpreter: bootstrap_args += " -i '%s'" % python_interpreter - if tunnel_bind_device: bootstrap_args += " -t '%s'" % tunnel_bind_device - if cleanup: bootstrap_args += " -x '%s'" % cleanup - - for arg in services: - bootstrap_args += " -j '%s'" % arg - for arg in pre_bootstrap_0: - bootstrap_args += " -e '%s'" % arg - for arg in pre_bootstrap_1: - bootstrap_args += " -w '%s'" % arg - - agent_cfg['owner'] = 'agent.0' - agent_cfg['resource'] = resource - agent_cfg['nodes'] = requested_nodes - agent_cfg['cores'] = requested_cores - agent_cfg['gpus'] = requested_gpus - agent_cfg['spawner'] = agent_spawner - agent_cfg['scheduler'] = agent_scheduler - agent_cfg['runtime'] = runtime - agent_cfg['app_comm'] = app_comm - agent_cfg['service_url'] = service_url - agent_cfg['sid'] = sid - agent_cfg['pid'] = pid - agent_cfg['pmgr'] = self._pmgr - agent_cfg['logdir'] = '.' - agent_cfg['pilot_sandbox'] = pilot_sandbox - agent_cfg['session_sandbox'] = session_sandbox - agent_cfg['resource_sandbox'] = resource_sandbox - agent_cfg['resource_manager'] = resource_manager - agent_cfg['cores_per_node'] = cores_per_node - agent_cfg['gpus_per_node'] = gpus_per_node - agent_cfg['lfs_path_per_node'] = lfs_path_per_node - agent_cfg['lfs_size_per_node'] = lfs_size_per_node - agent_cfg['task_tmp'] = task_tmp - agent_cfg['task_pre_launch'] = task_pre_launch - agent_cfg['task_pre_exec'] = task_pre_exec - agent_cfg['task_pre_rank'] = task_pre_rank - agent_cfg['task_post_launch'] = task_post_launch - agent_cfg['task_post_exec'] = task_post_exec - agent_cfg['task_post_rank'] = task_post_rank - agent_cfg['resource_cfg'] = copy.deepcopy(rcfg) - agent_cfg['debug'] = self._log.getEffectiveLevel() - - # pilot needs to know where to fetch tasks from and where to - # send tasks back to - agent_cfg['client_comm'] = { - 'agent_staging_input_pubsub': - ru.read_json('%s/agent_staging_input_pubsub.cfg' % self._cfg.path), - 'tmgr_staging_output_queue': - ru.read_json('%s/tmgr_staging_output_queue.cfg' % self._cfg.path), - 'control_pubsub': - ru.read_json('%s/control_pubsub.cfg' % self._cfg.path), - } - - - # we'll also push the agent config into MongoDB - pilot['cfg'] = agent_cfg - - # ---------------------------------------------------------------------- - # Write agent config dict to a json file in pilot sandbox. - - agent_cfg_name = 'agent.0.cfg' - cfg_tmp_handle, cfg_tmp_file = tempfile.mkstemp(prefix='rp.agent_cfg.') - os.close(cfg_tmp_handle) # file exists now - - # Convert dict to json file - self._log.debug("Write agent cfg to '%s'.", cfg_tmp_file) - agent_cfg.write(cfg_tmp_file) - - # always stage agent cfg for each pilot, not in the tarball - # FIXME: purge the tmp file after staging - self._log.debug('cfg %s -> %s', agent_cfg['pid'], pilot_sandbox) - ret['sds'].append({'source': cfg_tmp_file, - 'target': '%s/%s' % (pilot['pilot_sandbox'], agent_cfg_name), - 'action': rpc.TRANSFER}) - - # always stage the bootstrapper for each pilot, not in the tarball - # FIXME: this results in many staging ops for many pilots - bootstrapper_path = os.path.abspath("%s/agent/bootstrap_0.sh" - % self._root_dir) - ret['sds'].append({'source': bootstrapper_path, - 'target': '%s/bootstrap_0.sh' % pilot['pilot_sandbox'], - 'action': rpc.TRANSFER}) - - # always stage RU env helper - env_helper = ru.which('radical-utils-env.sh') - assert(env_helper) - self._log.debug('env %s -> %s', env_helper, pilot_sandbox) - ret['sds'].append({'source': env_helper, - 'target': '%s/%s' % (pilot['pilot_sandbox'], - os.path.basename(env_helper)), - 'action': rpc.TRANSFER}) - - # ---------------------------------------------------------------------- - # we also touch the log and profile tarballs in the target pilot sandbox - ret['fts'].append({'src': '/dev/null', - 'tgt': '%s/%s' % (pilot_sandbox, '%s.log.tgz' % pid), - 'rem': False}) # don't remove /dev/null - # only stage profiles if we profile - if self._prof.enabled: - ret['fts'].append({ - 'src': '/dev/null', - 'tgt': '%s/%s' % (pilot_sandbox, '%s.prof.tgz' % pid), - 'rem': False}) # don't remove /dev/null - - # check if we have a sandbox cached for that resource. If so, we have - # nothing to do. Otherwise we create the sandbox and stage the RP - # stack etc. - # - # NOTE: this will race when multiple pilot launcher instances are used! - # - if resource not in self._sandboxes: - - for sdist in sdist_paths: - base = os.path.basename(sdist) - ret['fts'].append({ - 'src': sdist, - 'tgt': '%s/%s' % (session_sandbox, base), - 'rem': False - }) - - self._sandboxes[resource] = True - - # ---------------------------------------------------------------------- - # Create SAGA Job description and submit the pilot job - - total_cpu_count = (requested_nodes * cores_per_node) or requested_cores - total_gpu_count = (requested_nodes * gpus_per_node) or requested_gpus - - jd = rs.job.Description() - - jd.name = job_name - jd.executable = '/bin/bash' - jd.arguments = ['-l ./bootstrap_0.sh %s' % bootstrap_args] - jd.working_directory = pilot_sandbox - jd.project = project - jd.output = 'bootstrap_0.out' - jd.error = 'bootstrap_0.err' - jd.total_cpu_count = total_cpu_count - jd.total_gpu_count = total_gpu_count - jd.total_physical_memory = requested_memory - jd.processes_per_host = avail_cores_per_node - jd.spmd_variation = spmd_variation - jd.wall_time_limit = runtime - jd.queue = queue - jd.candidate_hosts = candidate_hosts - jd.environment = dict() - jd.system_architecture = dict(system_architecture) - - # register used resources in DB (enacted on next advance) - pilot['resources'] = {'cpu': requested_cores, - 'gpu': requested_gpus} - pilot['$set'] = ['resources'] - - # we set any saga_jd_supplement keys which are not already set above - for key, val in saga_jd_supplement.items(): - if not jd[key]: - self._log.debug('supplement %s: %s', key, val) - jd[key] = val - - # job description environment variable(s) setup - - if self._prof.enabled: - jd.environment['RADICAL_PROFILE'] = 'TRUE' - - jd.environment['RADICAL_BASE'] = resource_sandbox - - self._log.debug("Bootstrap command line: %s %s", jd.executable, jd.arguments) - - ret['jd'] = jd - return ret - - - # -------------------------------------------------------------------------- - # - def _stage_in(self, pilot, sds): - ''' - Run some input staging directives. - ''' - - resource_sandbox = self._session._get_resource_sandbox(pilot) - # session_sandbox = self._session._get_session_sandbox (pilot) - pilot_sandbox = self._session._get_pilot_sandbox (pilot) - client_sandbox = self._session._get_client_sandbox() - - # contexts for staging url expansion - rem_ctx = {'pwd' : pilot_sandbox, - 'client' : client_sandbox, - 'pilot' : pilot_sandbox, - 'resource': resource_sandbox} - - loc_ctx = {'pwd' : client_sandbox, - 'client' : client_sandbox, - 'pilot' : pilot_sandbox, - 'resource': resource_sandbox} - - sds = ru.as_list(sds) - - for sd in sds: - sd['prof_id'] = pilot['uid'] - sd['source'] = str(complete_url(sd['source'], loc_ctx, self._log)) - sd['target'] = str(complete_url(sd['target'], rem_ctx, self._log)) - - self._stage(sds) - - - # -------------------------------------------------------------------------- - # - def _stage_out(self, pilot, sds): - ''' - Run some output staging directives. - ''' - - resource_sandbox = self._session._get_resource_sandbox(pilot) - # session_sandbox = self._session._get_session_sandbox (pilot) - pilot_sandbox = self._session._get_pilot_sandbox (pilot) - client_sandbox = self._session._get_client_sandbox() - - # contexts for staging url expansion - loc_ctx = {'pwd' : client_sandbox, - 'client' : client_sandbox, - 'pilot' : pilot_sandbox, - 'resource': resource_sandbox} - - rem_ctx = {'pwd' : pilot_sandbox, - 'client' : client_sandbox, - 'pilot' : pilot_sandbox, - 'resource': resource_sandbox} - - sds = ru.as_list(sds) - - for sd in sds: - sd['prof_id'] = pilot['uid'] - - for sd in sds: - sd['source'] = str(complete_url(sd['source'], rem_ctx, self._log)) - sd['target'] = str(complete_url(sd['target'], loc_ctx, self._log)) - - self._stage(sds) - - - # -------------------------------------------------------------------------- - # - def _stage(self, sds): - - # add uid, ensure its a list, general cleanup - sds = expand_staging_directives(sds) - uids = [sd['uid'] for sd in sds] - - # prepare to wait for completion - with self._sds_lock: - - self._active_sds = dict() - for sd in sds: - sd['state'] = rps.NEW - self._active_sds[sd['uid']] = sd - - sd_states = [sd['state'] for sd - in list(self._active_sds.values()) - if sd['uid'] in uids] - - # push them out - self._stager_queue.put(sds) - - while rps.NEW in sd_states: - time.sleep(1.0) - with self._sds_lock: - sd_states = [sd['state'] for sd - in list(self._active_sds.values()) - if sd['uid'] in uids] - - if rps.FAILED in sd_states: - raise RuntimeError('pilot staging failed') - - - # -------------------------------------------------------------------------- - # - def _staging_ack_cb(self, topic, msg): - ''' - update staging directive state information - ''' - - cmd = msg.get('cmd') - arg = msg.get('arg') - - if cmd == 'staging_result': - - with self._sds_lock: - for sd in arg['sds']: - if sd['uid'] in self._active_sds: - self._active_sds[sd['uid']]['state'] = sd['state'] - - return True - - -# ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py index b3784f7432..9f0c096407 100644 --- a/src/radical/pilot/proxy.py +++ b/src/radical/pilot/proxy.py @@ -235,7 +235,7 @@ def _worker(self, sid, q, term): proxy_cp = None proxy_sp = None - proxy_aq = None + proxy_tq = None try: proxy_cp = ru.zmq.PubSub(channel='proxy_control_pubsub', @@ -250,7 +250,7 @@ def _worker(self, sid, q, term): 'log_lvl': 'debug', 'path' : sid}) - proxy_aq = ru.zmq.Queue (channel='proxy_task_queue', + proxy_tq = ru.zmq.Queue (channel='proxy_task_queue', cfg={'uid' : 'proxy_task_queue', 'type' : 'queue', 'log_lvl': 'debug', @@ -258,14 +258,14 @@ def _worker(self, sid, q, term): proxy_cp.start() proxy_sp.start() - proxy_aq.start() + proxy_tq.start() cfg = {'proxy_control_pubsub': {'addr_pub': str(proxy_cp.addr_pub), 'addr_sub': str(proxy_cp.addr_sub)}, 'proxy_state_pubsub' : {'addr_pub': str(proxy_sp.addr_pub), 'addr_sub': str(proxy_sp.addr_sub)}, - 'proxy_task_queue' : {'addr_put': str(proxy_aq.addr_put), - 'addr_get': str(proxy_aq.addr_get)}} + 'proxy_task_queue' : {'addr_put': str(proxy_tq.addr_put), + 'addr_get': str(proxy_tq.addr_get)}} # inform service about endpoint details q.put(cfg) @@ -282,7 +282,7 @@ def _worker(self, sid, q, term): if proxy_cp: proxy_cp.stop() if proxy_sp: proxy_sp.stop() - if proxy_aq: proxy_aq.stop() + if proxy_tq: proxy_tq.stop() log.info('terminated') diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index fd133724b3..72a5608632 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -230,6 +230,14 @@ def start(self): assert self._thread.is_alive() + # -------------------------------------------------------------------------- + # + def wait(self): + + while not self._term.is_set(): + time.sleep(1) + + # -------------------------------------------------------------------------- # def _work_loop(self, sync): @@ -747,8 +755,6 @@ def register_publisher(self, pubsub): assert pubsub not in self._publishers cfg = self._reg['bridges.%s' % pubsub] - import pprint - self._log.debug('===>> %s', pprint.pformat(cfg)) self._publishers[pubsub] = ru.zmq.Publisher(channel=pubsub, url=cfg['addr_pub'], log=self._log, diff --git a/src/radical/pilot/utils/component_manager.py b/src/radical/pilot/utils/component_manager.py new file mode 100644 index 0000000000..f07eb3ddf6 --- /dev/null +++ b/src/radical/pilot/utils/component_manager.py @@ -0,0 +1,220 @@ + +__copyright__ = 'Copyright 2023, The RADICAL-Cybertools Team' +__license__ = 'MIT' + +# pylint: disable=global-statement # W0603 global `_components` + +import os +import copy +import time + +import threading as mt +import radical.utils as ru + +from .. import constants as rpc +from .. import states as rps + + +# ------------------------------------------------------------------------------ +# +class ComponentManager(object): + ''' + RP spans a hierarchy of component instances: the application has a pmgr and + tmgr, and the tmgr has a staging component and a scheduling component, and + the pmgr has a launching component, and components also can have bridges, + etc. This ComponentManager centralises the code needed to spawn, manage and + terminate such components. Any code which needs to create component should + create a ComponentManager instance and pass the required component and + bridge layout and configuration. Callng `stop()` on the cmgr will terminate + the components and brisged. + ''' + + # -------------------------------------------------------------------------- + # + def __init__(self, sid, reg_addr, owner): + + # create a registry client to obtain the session config and to store + # component and bridge configs + + self._sid = sid + self._reg_addr = reg_addr + self._owner = owner + + self._reg = ru.zmq.RegistryClient(url=self._reg_addr) + self._cfg = ru.Config(from_dict=self._reg['cfg']) + self._hb_cfg = ru.Config(from_dict=self._reg['heartbeat']) + + self._uid = ru.generate_id('cmgr.%(item_counter)04d', + ru.ID_CUSTOM, ns=self._sid) + + self._prof = ru.Profiler(self._uid, ns='radical.pilot', + path=self._cfg.path) + self._log = ru.Logger(self._uid, ns='radical.pilot', + path=self._cfg.path) + + self._prof.prof('init2', uid=self._uid, msg=self._cfg.path) + + self._log.debug('cmgr %s (%s)', self._uid, self._owner) + + # component managers listen on the heartbeat pubsub to see if spawned + # components come alive + self._heartbeats = dict() # heartbeats we have seen + ru.write_json(self._cfg.as_dict(), '%s.json' % self.uid) + ru.zmq.Subscriber(channel='heartbeat_pubsub', + topic='heartbeat', + url=self._hb_cfg.addr_sub, + cb=self._hb_msg_cb, + log=self._log, + prof=self._prof) + + + # -------------------------------------------------------------------------- + # + def _hb_msg_cb(self, topic, msg): + + self._heartbeats[msg['uid']] = time.time() + + + # -------------------------------------------------------------------------- + # + def _wait_startup(self, uids, timeout): + ''' + Wait for the first heartbeat of the given component UIDs to appear. If + that does not happen before timeout, an exception is raised. + ''' + + start = time.time() + ok = list() + nok = uids + while True: + + self._log.debug('wait for : %s', nok) + + ok = [uid for uid in uids if uid in self._heartbeats] + nok = [uid for uid in uids if uid not in ok] + + if len(ok) == len(uids): + break + + if time.time() - start > timeout: + self._log.debug('wait failed: %s', nok) + raise RuntimeError('uids %s not found' % nok) + + time.sleep(0.25) + + + # -------------------------------------------------------------------------- + # + @property + def uid(self): + return self._uid + + + # -------------------------------------------------------------------------- + # + def start_bridges(self, bridges): + + self._prof.prof('start_bridges_start', uid=self._uid) + + buids = list() + for bname, bcfg in bridges.items(): + + uid = bname + buids.append(uid) + + bcfg.uid = uid + bcfg.channel = bname + bcfg.cmgr = self.uid + bcfg.owner = self._owner + bcfg.sid = self._cfg.sid + bcfg.path = self._cfg.path + bcfg.reg_addr = self._cfg.reg_addr + bcfg.heartbeat = self._hb_cfg + + self._reg['bridges.%s.cfg' % bname] = bcfg + + # self._reg.put('bridge.%s' % bname, bcfg) + + self._log.info('create bridge %s [%s]', bname, bcfg.uid) + + cmd = 'radical-pilot-bridge %s %s %s' \ + % (self._sid, self._reg.url, bname) + out, err, ret = ru.sh_callout(cmd, cwd=self._cfg.path) + + self._log.debug('bridge startup out: %s', out) + self._log.debug('bridge startup err: %s', err) + + if ret: + raise RuntimeError('bridge startup failed') + + self._heartbeats[bname] = None + self._log.info('created bridge %s [%s]', bname, bname) + + # all bridges are started, wait for their heartbeats + self._log.debug('wait for %s', buids) + self._wait_startup(buids, timeout=self._hb_cfg.timeout) + + self._prof.prof('start_bridges_stop', uid=self._uid) + + + # -------------------------------------------------------------------------- + # + def start_components(self, components, cfg = None): + + self._prof.prof('start_components_start: %s', uid=self._uid) + + cuids = list() + for cname, ccfg in components.items(): + + for _ in range(ccfg.get('count', 1)): + + uid = ru.generate_id(cname + '.%(item_counter)04d', + ru.ID_CUSTOM, ns=self._sid) + cuids.append(uid) + + ccfg.uid = uid + ccfg.kind = cname + ccfg.owner = self._owner + ccfg.sid = self._cfg.sid + ccfg.cmgr = self._cfg.uid + ccfg.base = self._cfg.base + ccfg.path = self._cfg.path + ccfg.reg_addr = self._cfg.reg_addr + ccfg.proxy_url = self._cfg.proxy_url + ccfg.heartbeat = self._hb_cfg + + if cfg: + ru.dict_merge(ccfg, cfg, ru.OVERWRITE) + + self._reg['components.%s.cfg' % uid] = ccfg + + self._log.info('create component %s [%s]', cname, uid) + + cmd = 'radical-pilot-component %s %s %s' \ + % (self._sid, self._reg.url, uid) + out, err, ret = ru.sh_callout(cmd, cwd=self._cfg.path) + + self._log.debug('component startup out: %s' , out) + self._log.debug('component startup err: %s' , err) + + if ret: + raise RuntimeError('component startup failed') + + cuids.append(uid) + self._log.info('created component %s [%s]', cname, uid) + + # all components should start now, wait for heartbeats to appear. + self._log.debug('wait for %s', cuids) + self._wait_startup(cuids, timeout=self._hb_cfg.timeout) + + self._prof.prof('start_components_stop', uid=self._uid) + + + # -------------------------------------------------------------------------- + # + def close(self): + + self._prof.prof('close', uid=self._uid) + + +# ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/utils/prof_utils.py b/src/radical/pilot/utils/prof_utils.py index d9c4f1a8d2..8a47a6dad6 100644 --- a/src/radical/pilot/utils/prof_utils.py +++ b/src/radical/pilot/utils/prof_utils.py @@ -435,7 +435,7 @@ def get_hostmap(profile): ''' We abuse the profile combination to also derive a pilot-host map, which will tell us on what exact host each pilot has been running. To do so, we - check for the PMGR_ACTIVE advance event in agent.0.prof, and use the NTP + check for the PMGR_ACTIVE advance event in agent_0.prof, and use the NTP sync info to associate a hostname. ''' # FIXME: This should be replaced by proper hostname logging @@ -471,7 +471,7 @@ def get_hostmap_deprecated(profiles): for row in prof: - if 'agent.0.prof' in pname and \ + if 'agent_0.prof' in pname and \ row[ru.EVENT] == 'advance' and \ row[ru.STATE] == s.PMGR_ACTIVE: hostmap[row[ru.UID]] = host_id From 316e3b43d5882ea22a0faab89a82df9917da46fa Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 9 Jun 2023 16:23:35 +0200 Subject: [PATCH 062/171] snapshot --- examples/00_getting_started.py | 2 +- examples/01_task_details.py | 2 +- src/radical/pilot/agent/agent_0.py | 188 +++++--------- src/radical/pilot/agent/executing/popen.py | 4 + .../pilot/agent/resource_manager/base.py | 1 - .../pilot/agent/staging_output/default.py | 1 - src/radical/pilot/configs/tmgr_default.json | 2 +- src/radical/pilot/pilot.py | 53 +++- src/radical/pilot/pilot_manager.py | 18 +- src/radical/pilot/raptor/master.py | 7 +- src/radical/pilot/raptor/worker.py | 35 ++- src/radical/pilot/raptor/worker_default.py | 5 +- src/radical/pilot/raptor_tasks.py | 4 +- src/radical/pilot/session.py | 150 ++++++----- src/radical/pilot/task_manager.py | 79 ++++-- src/radical/pilot/tmgr/scheduler/base.py | 4 +- .../pilot/tmgr/staging_input/default.py | 3 + src/radical/pilot/utils/component_manager.py | 1 - src/radical/pilot/worker/update.py | 245 ------------------ 19 files changed, 303 insertions(+), 501 deletions(-) delete mode 100644 src/radical/pilot/worker/update.py diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 86190780d7..0e4fbb96c8 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -66,7 +66,7 @@ pilot = pmgr.submit_pilots(pdesc) - n = 1 * 1024 # number of tasks to run + n = 1 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/examples/01_task_details.py b/examples/01_task_details.py index 6fab63c1cb..50072854fc 100755 --- a/examples/01_task_details.py +++ b/examples/01_task_details.py @@ -78,7 +78,7 @@ # Create a workload of tasks. # Each task runs '/bin/date'. - n = 1024 * 1024 # number of tasks to run + n = 1 * 1024 # number of tasks to run report.info('create %d task description(s)\n' % n) tds = list() diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 47fdde9bd2..aca44ca16a 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -31,11 +31,7 @@ class Agent_0(rpu.Worker): the sub-agents die, it will shut down the other sub-agents and itself. This class inherits the rpu.Worker, so that it can use its communication - bridges and callback mechanisms. Specifically, it will pull the tasks from - the proxy comm channels and forwards them to the agent's component network - (see `work()`). It will also watch the proxy pubsub for any commands to be - enacted or forwarded (pilot termination, task cancelation, etc), and will - take care of heartbeat messages to be sent to the client. + bridges and callback mechanisms. ''' # -------------------------------------------------------------------------- @@ -85,54 +81,6 @@ def __init__(self): # regularly check for lifetime limit self.register_timed_cb(self._check_lifetime, timer=10) - # all set up - connect to proxy to fetch / push tasks - self._connect_proxy() - - - # -------------------------------------------------------------------------- - # - def _connect_proxy(self): - - # at this point the session is up and connected, and it should have - # brought up all communication bridges and components. We are - # ready to rumble! - self.register_subscriber(rpc.CONTROL_PUBSUB, self._check_control) - self.register_subscriber(rpc.STATE_PUBSUB, self._service_state_cb) - - # register the control callback - self.register_subscriber(rpc.PROXY_CONTROL_PUBSUB, - self._proxy_control_cb) - - # proxy state updates - self.register_publisher(rpc.PROXY_STATE_PUBSUB) - self.register_subscriber(rpc.STATE_PUBSUB, self._proxy_state_cb) - - # # write config files for proxy channels - # for p in self._cfg.proxy: - # ru.write_json('%s.cfg' % p, self._cfg.proxy[p]) - - # listen for new tasks from the client - self.register_input(rps.AGENT_STAGING_INPUT_PENDING, - rpc.PROXY_TASK_QUEUE, - qname=self._pid, - cb=self._proxy_input_cb) - - # and forward to agent input staging - self.register_output(rps.AGENT_STAGING_INPUT_PENDING, - rpc.AGENT_STAGING_INPUT_QUEUE) - - # listen for completed tasks to foward to client - self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, - rpc.AGENT_COLLECTING_QUEUE, - qname='default', - cb=self._proxy_output_cb) - - # and register output - self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, - rpc.PROXY_TASK_QUEUE) - - # FIXME: register pubsubs - # -------------------------------------------------------------------------- # @@ -251,6 +199,26 @@ def _configure_app_comm(self): # def initialize(self): + # listen for new tasks from the client + self.register_input(rps.AGENT_STAGING_INPUT_PENDING, + rpc.PROXY_TASK_QUEUE, + qname=self._pid, + cb=self._proxy_input_cb) + + # and forward to agent input staging + self.register_output(rps.AGENT_STAGING_INPUT_PENDING, + rpc.AGENT_STAGING_INPUT_QUEUE) + + # listen for completed tasks to foward to client + self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, + rpc.AGENT_COLLECTING_QUEUE, + qname='default', + cb=self._proxy_output_cb) + + # and register output + self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, + rpc.PROXY_TASK_QUEUE) + # before we run any tasks, prepare a named_env `rp` for tasks which use # the pilot's own environment, such as raptors env_spec = {'type' : os.environ['RP_VENV_TYPE'], @@ -336,11 +304,12 @@ def finalize(self): 'stdout' : out, 'stderr' : err, 'logfile': log, - 'state' : state} + 'state' : state, + 'forward': True} self._log.debug('push final state update') self._log.debug('update state: %s: %s', state, self._final_cause) - self.publish(rpc.PROXY_STATE_PUBSUB, + self.publish(rpc.STATE_PUBSUB, topic=rpc.STATE_PUBSUB, msg=[pilot]) # tear things down in reverse order @@ -595,16 +564,13 @@ def _check_lifetime(self): # -------------------------------------------------------------------------- # - def _proxy_state_cb(self, topic, msg): - # no need to check - blindly forward all messages to the proxy - self.publish(rpc.PROXY_STATE_PUBSUB, topic=topic, msg=msg) - - - # -------------------------------------------------------------------------- - # - def _proxy_control_cb(self, topic, msg): + def _control_cb(self, _, msg): + ''' + Check for commands on the control pubsub, mainly waiting for RPC + requests to handle. + ''' - self._log.debug('proxy control: %s', msg) + self._log.debug('control: %s', msg) cmd = msg['cmd'] arg = msg['arg'] @@ -615,21 +581,19 @@ def _proxy_control_cb(self, topic, msg): if cmd == 'pmgr_heartbeat' and arg['pmgr'] == self._pmgr: - self._hb.beat(uid=self._pmgr) + self._session._hb.beat(uid=self._pmgr) return True - if cmd == 'prep_env': + elif cmd == 'prep_env': env_spec = arg for env_id in env_spec: - # ensure we have a full hb period for the prep_env call - self._hb.beat(uid=self._pmgr) self._prepare_env(env_id, env_spec[env_id]) return True - if cmd == 'cancel_pilots': + elif cmd == 'cancel_pilots': if self._pid not in arg.get('uids'): self._log.debug('ignore cancel %s', msg) @@ -643,73 +607,45 @@ def _proxy_control_cb(self, topic, msg): # work is done - unregister this cb return False - ## if cmd != 'rpc_res': - ## # not an rpc responese, keep cb registered - ## return True - ## - ## if rpc_res['uid'] != rpc_id: - ## # not the right rpc response, keep cb registered - ## return True - # all other messages (such as cancel_tasks) are forwarded to the agent - # control pubsub, to be picked up by the respective target components - self._log.debug('fwd control msg %s', msg) - self.publish(rpc.CONTROL_PUBSUB, msg) - - return True + elif cmd == 'rpc_req': + req = arg['rpc'] + if req not in ['hello', 'prepare_env']: + # we don't handle that request + return True - # -------------------------------------------------------------------------- - # - def _check_control(self, _, msg): - ''' - Check for commands on the control pubsub, mainly waiting for RPC - requests to handle. We handle two types of RPC requests: `hello` for - testing, and `prepare_env` for environment preparation requests. - ''' - - cmd = msg['cmd'] - arg = msg['arg'] + rpc_res = {'uid': arg['uid']} - if cmd != 'rpc_req': - # not an rpc request - return True + try: + if req == 'hello' : + out = 'hello %s' % ' '.join(arg['arg']) - req = arg['rpc'] - if req not in ['hello', 'prepare_env']: - # we don't handle that request - return True + elif req == 'prepare_env': + env_name = arg['arg']['env_name'] + env_spec = arg['arg']['env_spec'] + out = self._prepare_env(env_name, env_spec) - rpc_res = {'uid': arg['uid']} + else: + # unknown command + return True - try: - if req == 'hello' : - out = 'hello %s' % ' '.join(arg['arg']) + # request succeeded - respond with return value + rpc_res['err'] = None + rpc_res['out'] = out + rpc_res['ret'] = 0 - elif req == 'prepare_env': - env_name = arg['arg']['env_name'] - env_spec = arg['arg']['env_spec'] - out = self._prepare_env(env_name, env_spec) + except Exception as e: + # request failed for some reason - indicate error + rpc_res['err'] = repr(e) + rpc_res['out'] = None + rpc_res['ret'] = 1 + self._log.exception('control cmd failed') - else: - # unknown command - return True + # publish the response (success or failure) + self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_res', + 'arg': rpc_res}) - # request succeeded - respond with return value - rpc_res['err'] = None - rpc_res['out'] = out - rpc_res['ret'] = 0 - - except Exception as e: - # request failed for some reason - indicate error - rpc_res['err'] = repr(e) - rpc_res['out'] = None - rpc_res['ret'] = 1 - self._log.exception('control cmd failed') - - # publish the response (success or failure) - self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_res', - 'arg': rpc_res}) return True diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index 708cb76c23..5a91ba25e3 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -541,6 +541,7 @@ def _get_rp_funcs(self): def _get_rp_env(self, task): tid = task['uid'] + td = task['description'] name = task.get('name') or tid sbox = os.path.realpath(task['task_sandbox_path']) @@ -558,6 +559,9 @@ def _get_rp_env(self, task): ret += 'export RP_PILOT_SANDBOX="%s"\n' % self.psbox ret += 'export RP_TASK_SANDBOX="%s"\n' % sbox ret += 'export RP_REGISTRY_ADDRESS="%s"\n' % self._session.reg_addr + ret += 'export RP_CORES_PER_RANK=%d\n' % td['cores_per_rank'] + ret += 'export RP_GPUS_PER_RANK=%d\n' % td['gpus_per_rank'] + # FIXME AM # ret += 'export RP_LFS="%s"\n' % self.lfs ret += 'export RP_GTOD="%s"\n' % self.gtod diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index 77d74dbd2d..ba93af20d1 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -336,7 +336,6 @@ def _prepare_launch_methods(self, rm_info): lm_name, lm_cfg, rm_info, self._log, self._prof) except Exception as e: - print(repr(e)) self._log.exception('skip lm %s', lm_name) self._launch_order.remove(lm_name) diff --git a/src/radical/pilot/agent/staging_output/default.py b/src/radical/pilot/agent/staging_output/default.py index a66664d479..5cdeaac5df 100644 --- a/src/radical/pilot/agent/staging_output/default.py +++ b/src/radical/pilot/agent/staging_output/default.py @@ -49,7 +49,6 @@ def initialize(self): self.register_input(rps.AGENT_STAGING_OUTPUT_PENDING, rpc.AGENT_STAGING_OUTPUT_QUEUE, self.work) - # we don't need an output queue -- tasks are picked up via mongodb self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.AGENT_COLLECTING_QUEUE) diff --git a/src/radical/pilot/configs/tmgr_default.json b/src/radical/pilot/configs/tmgr_default.json index 67ccdc5762..510a529849 100644 --- a/src/radical/pilot/configs/tmgr_default.json +++ b/src/radical/pilot/configs/tmgr_default.json @@ -20,7 +20,7 @@ "bridges" : { "tmgr_staging_input_queue" : {"kind": "queue" }, "tmgr_scheduling_queue" : {"kind": "queue" }, - "agent_staging_input_pubsub": {"kind": "pubsub"}, + "agent_staging_input_queue" : {"kind": "queue" }, "tmgr_staging_output_queue" : {"kind": "queue" }, "tmgr_unschedule_pubsub" : {"kind": "pubsub"}, diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 57640b4687..dcc8117f3d 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -5,6 +5,7 @@ import copy import time +import queue import radical.utils as ru @@ -155,6 +156,18 @@ def __init__(self, pmgr: PilotManager, descr): self._session_sandbox .path = self._session_sandbox .path % expand self._pilot_sandbox .path = self._pilot_sandbox .path % expand + # hook into the control pubsub for rpc handling + self._rpc_queue = queue.Queue() + ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] + ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] + + ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=ctrl_addr_sub, + log=self._log, prof=self._prof, + cb=self._control_cb, topic=rpc.CONTROL_PUBSUB) + + self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, url=ctrl_addr_pub, + log=self._log, prof=self._prof) + # -------------------------------------------------------------------------- # @@ -702,18 +715,44 @@ def stage_in(self, sds): # -------------------------------------------------------------------------- # - def rpc(self, rpc, args): + def _control_cb(self, topic, msg): + + cmd = msg['cmd'] + arg = msg['arg'] + + if cmd == 'rpc_res': + + self._log.debug('==== rpc res: %s', arg) + self._rpc_queue.put(arg) + + + # -------------------------------------------------------------------------- + # + def rpc(self, cmd, args): '''Remote procedure call. - Send a pilot command, wait for the response, and return the result. - This is basically an RPC into the pilot. + Send am RPC command and arguments to the pilot and wait for the + response. This is a synchronous operation at this point, and it is not + thread safe to have multiple concurrent RPC calls. ''' - # FIXME: MongoDB - reply = None - # reply = self._session._dbs.pilot_rpc(self.uid, rpc, args) + rpc_id = ru.generate_id('rpc') + rpc_req = {'uid' : rpc_id, + 'rpc' : cmd, + 'tgt' : self._uid, + 'arg' : args} + + self._ctrl_pub.put(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_req', + 'arg': rpc_req, + 'fwd': True}) + + rpc_res = self._rpc_queue.get() + self._log.debug('rpc result: %s', rpc_res['ret']) + + if rpc_res['ret']: + raise RuntimeError('rpc failed: %s' % rpc_res['err']) - return reply + return rpc_res['ret'] # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index e861f45fd1..776d3d62f1 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -75,7 +75,7 @@ class PilotManager(rpu.Component): # -------------------------------------------------------------------------- # - def __init__(self, session, uid=None, cfg='default'): + def __init__(self, session, cfg='default'): """Creates a new PilotManager and attaches is to the session. Arguments: @@ -91,13 +91,8 @@ def __init__(self, session, uid=None, cfg='default'): assert session._role == session._PRIMARY, 'pmgr needs primary session' # initialize the base class (with no intent to fork) - if uid: - self._reconnect = True - self._uid = uid - else: - self._reconnect = False - self._uid = ru.generate_id('pmgr.%(item_counter)04d', - ru.ID_CUSTOM, ns=session.uid) + self._uid = ru.generate_id('pmgr.%(item_counter)04d', + ru.ID_CUSTOM, ns=session.uid) self._uids = list() # known UIDs self._pilots = dict() @@ -139,11 +134,7 @@ def __init__(self, session, uid=None, cfg='default'): self._cmgr.start_bridges(self._cfg.bridges) self._cmgr.start_components(self._cfg.components) - if self._reconnect: - self._session._reconnect_pmgr(self) - self._reconnect_pilots() - else: - self._session._register_pmgr(self) + self._session._register_pmgr(self) # The output queue is used to forward submitted pilots to the # launching component. @@ -647,6 +638,7 @@ def _reconnect_pilots(self): # self.is_valid() + # FIXME MONGODB pilot_docs = self._session._dbs.get_pilots(pmgr_uid=self.uid) with self._pilots_lock: diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index b929528f8b..010f595f1f 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -54,7 +54,8 @@ def __init__(self, cfg: ru.Config = None): self._hb_freq = 10 # check worker heartbetas every n seconds self._hb_timeout = 15 # consider worker dead after 15 seconds - self._session = Session(uid=self._sid) + self._session = Session(uid=self._sid, _reg_addr=self._reg_addr, + _role=Session._DEFAULT) self._rpc_handlers = dict() self.register_rpc_handler('stop', self.stop) @@ -268,6 +269,9 @@ def _control_cb(self, topic, msg): rpc_res['out'] = '' rpc_res['ret'] = 1 + # inform client side + rpc_res['forward'] = True + self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_res', 'arg': rpc_res}) @@ -381,6 +385,7 @@ def submit_workers(self, descriptions: List[TaskDescription] # the default worker needs it's own task description to derive the # amount of available resources self._reg['raptor.%s.cfg' % self._uid] = td.as_dict() + self._reg.dump('raptor_master') # all workers run in the same sandbox as the master task = dict() diff --git a/src/radical/pilot/raptor/worker.py b/src/radical/pilot/raptor/worker.py index 9534451352..32df4992b1 100644 --- a/src/radical/pilot/raptor/worker.py +++ b/src/radical/pilot/raptor/worker.py @@ -55,16 +55,16 @@ def __init__(self, manager, rank, raptor_id): state_cfg = self._reg['bridges.%s' % rpc.STATE_PUBSUB] ctrl_cfg = self._reg['bridges.%s' % rpc.CONTROL_PUBSUB] - ru.zmq.Subscriber(rpc.STATE_PUBSUB, url=state_cfg['sub'], + ru.zmq.Subscriber(rpc.STATE_PUBSUB, url=state_cfg['addr_sub'], log=self._log, prof=self._prof, cb=self._state_cb, topic=rpc.STATE_PUBSUB) - ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=ctrl_cfg['sub'], + ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=ctrl_cfg['addr_sub'], log=self._log, prof=self._prof, cb=self._control_cb, topic=rpc.CONTROL_PUBSUB) # we push hertbeat and registration messages on that pubsub also self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, - url=ctrl_cfg['pub'], + url=ctrl_cfg['addr_pub'], log=self._log, prof=self._prof) # let ZMQ settle @@ -144,27 +144,24 @@ def _hb_worker(self): # -------------------------------------------------------------------------- # - def _state_cb(self, topic, msg): + def _state_cb(self, topic, things): - cmd = msg['cmd'] - arg = msg['arg'] - - # general task state updates -- check if our master is affected - if cmd == 'update': + import pprint + self._log.debug('=== msg %s: %s', topic, pprint.pformat(things)) - for thing in ru.as_list(arg): + for thing in ru.as_list(things): - uid = thing['uid'] - state = thing['state'] + uid = thing['uid'] + state = thing['state'] - if uid == self._raptor_id: + if uid == self._raptor_id: - if state in rps.FINAL + [rps.AGENT_STAGING_OUTPUT_PENDING]: - # master completed - terminate this worker - self._log.info('master %s final: %s - terminate', - uid, state) - self.stop() - return False + if state in rps.FINAL + [rps.AGENT_STAGING_OUTPUT_PENDING]: + # master completed - terminate this worker + self._log.info('master %s final: %s - terminate', + uid, state) + self.stop() + return False return True diff --git a/src/radical/pilot/raptor/worker_default.py b/src/radical/pilot/raptor/worker_default.py index e2523856be..a62ed06372 100644 --- a/src/radical/pilot/raptor/worker_default.py +++ b/src/radical/pilot/raptor/worker_default.py @@ -47,11 +47,12 @@ def __init__(self, raptor_id : str): cb=self._request_cb) # the master should have stored our own task description in the registry + self._reg.dump('raptor_worker') self._descr = self._reg['raptor.%s.cfg' % self._uid] # keep worker ID and rank - self._n_cores = self._descr.get('cores_per_rank', 1) - self._n_gpus = int(self._descr.get('gpus_per_rank', 0)) + self._n_cores = int(os.environ.get('cores_per_rank', 1)) + self._n_gpus = int(os.environ.get('gpus_per_rank', 0)) # We need to make sure to run only up to `gpn` tasks using a gpu # within that pool, so need a separate counter for that. diff --git a/src/radical/pilot/raptor_tasks.py b/src/radical/pilot/raptor_tasks.py index abaea0cbde..92162cd85f 100644 --- a/src/radical/pilot/raptor_tasks.py +++ b/src/radical/pilot/raptor_tasks.py @@ -66,9 +66,7 @@ def rpc(self, rpc: str, if not self._pilot: raise RuntimeError('not assoigned to a pilot yet, cannot run rpc') - reply = self._session._dbs.pilot_rpc(self._pilot, self.uid, rpc, args) - - return reply + return self._tmgr.pilot_rpc(self._pilot, rpc, args) # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index ab6bfe0eeb..a8c7e50e55 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -207,7 +207,7 @@ def __init__(self, proxy_url: Optional[str ] = None, else: - self._init_secondary() + self._init_default() # now we have config and uid - initialize base class (saga session) @@ -229,7 +229,7 @@ def __init__(self, proxy_url: Optional[str ] = None, if self._role == self._PRIMARY: self._rep.ok('>>ok\n') - self._reg.dump('sinit.%s' % self._role) + assert(self._reg) # -------------------------------------------------------------------------- @@ -264,6 +264,9 @@ def _init_primary(self): # start bridges and components self._start_components() + # crosswire local channels and proxy channels + self._crosswire_proxy() + # -------------------------------------------------------------------------- # @@ -286,9 +289,10 @@ def _init_agent_0(self): self._start_registry() self._connect_registry() self._start_heartbeat() - self._publish_cfg() self._connect_proxy() + self._publish_cfg() self._start_components() + self._crosswire_proxy() # -------------------------------------------------------------------------- @@ -308,7 +312,7 @@ def _init_agent_n(self): # -------------------------------------------------------------------------- # - def _init_secondary(self): + def _init_default(self): # sub-agents and components connect to an existing registry (owned by # the `primary` session or `agent_0`) and load config settings from @@ -337,6 +341,9 @@ def _start_registry(self): # def _connect_registry(self): + if not self._cfg.reg_addr: + self._cfg.reg_addr = self._reg_addr + if not self._cfg.reg_addr: raise ValueError('session needs a registry address') @@ -577,6 +584,11 @@ def _publish_cfg(self): self._reg['bridges'] = self._cfg.bridges # proxy bridges self._reg['components'] = {} + # if we have proxy channels, publish them in the bridges configs too + if self._proxy_cfg: + for channel in self._proxy_cfg: + self._reg['bridges.%s' % channel] = self._proxy_cfg[channel] + # primary sessions publish all known resource configs under `rcfgs`, the # agent_0 only publishes the *current* resource config under `rcfg`. if self._role == self._PRIMARY: @@ -587,8 +599,6 @@ def _publish_cfg(self): self._reg['rcfg'] = self._rcfg self._reg['rcfgs'] = dict() - self._reg.dump('published') - # -------------------------------------------------------------------------- # @@ -623,13 +633,13 @@ def _start_proxy(self): # the proxy url becomes part of the session cfg self._cfg.proxy_url = self._proxy_url - self._rep.info ('<>ok\n') @@ -254,8 +259,7 @@ def close(self): # dump task json json = self._task_info - - tgt = '%s/tasks.%s.json' % (self._session.path, self.uid) + tgt = '%s/tasks.%s.json' % (self._session.path, self.uid) ru.write_json(json, tgt) @@ -638,6 +642,51 @@ def remove_pilots(self, pilot_ids, drain=False): 'tmgr' : self.uid}}) + # -------------------------------------------------------------------------- + # + def _control_cb(self, topic, msg): + + cmd = msg['cmd'] + arg = msg['arg'] + + if cmd == 'rpc_res': + + self._log.debug('==== rpc res: %s', arg) + self._rpc_queue.put(arg) + + + # -------------------------------------------------------------------------- + # + def pilot_rpc(self, pid, cmd, args): + '''Remote procedure call. + + Send am RPC command and arguments to the pilot and wait for the + response. This is a synchronous operation at this point, and it is not + thread safe to have multiple concurrent RPC calls. + ''' + + if pid not in self._pilots: + raise ValueError('tmgr does not know pilot %s' % uid) + + rpc_id = ru.generate_id('rpc') + rpc_req = {'uid' : rpc_id, + 'rpc' : cmd, + 'tgt' : pid, + 'arg' : args} + + self._ctrl_pub.put(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_req', + 'arg': rpc_req, + 'fwd': True}) + + rpc_res = self._rpc_queue.get() + self._log.debug('rpc result: %s', rpc_res['ret']) + + if rpc_res['ret']: + raise RuntimeError('rpc failed: %s' % rpc_res['err']) + + return rpc_res['ret'] + + # -------------------------------------------------------------------------- # def list_units(self): diff --git a/src/radical/pilot/tmgr/scheduler/base.py b/src/radical/pilot/tmgr/scheduler/base.py index c235ecdd2d..ca4cf1ba2b 100644 --- a/src/radical/pilot/tmgr/scheduler/base.py +++ b/src/radical/pilot/tmgr/scheduler/base.py @@ -306,7 +306,9 @@ def _base_control_cb(self, topic, msg): to_cancel[pid] = list() to_cancel[pid].append(uid) - dbs = self._session._dbs + # FIXME: MongoDB + # dbs = self._session._dbs + dbs = None if not dbs: # too late, already closing down diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 9d03fd6bed..8eb8f8599a 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -129,6 +129,9 @@ def _advance_tasks(self, tasks, pid=None, state=None, push=True): if not state: state = rps.AGENT_STAGING_INPUT_PENDING + for task in tasks: + self._log.debug('=== to PTQ: %s / %s', task['uid'], pid) + # perform and publish state update # push to the proxy queue self.advance(tasks, state, publish=True, push=push, qname=pid) diff --git a/src/radical/pilot/utils/component_manager.py b/src/radical/pilot/utils/component_manager.py index f07eb3ddf6..b6213dfc67 100644 --- a/src/radical/pilot/utils/component_manager.py +++ b/src/radical/pilot/utils/component_manager.py @@ -59,7 +59,6 @@ def __init__(self, sid, reg_addr, owner): # component managers listen on the heartbeat pubsub to see if spawned # components come alive self._heartbeats = dict() # heartbeats we have seen - ru.write_json(self._cfg.as_dict(), '%s.json' % self.uid) ru.zmq.Subscriber(channel='heartbeat_pubsub', topic='heartbeat', url=self._hb_cfg.addr_sub, diff --git a/src/radical/pilot/worker/update.py b/src/radical/pilot/worker/update.py deleted file mode 100644 index fbe22d3cc1..0000000000 --- a/src/radical/pilot/worker/update.py +++ /dev/null @@ -1,245 +0,0 @@ - -__copyright__ = "Copyright 2016, http://radical.rutgers.edu" -__license__ = "MIT" - - -import time -import pymongo - -import radical.utils as ru - -from .. import utils as rpu -from .. import constants as rpc - -from ..db import DBSession - - -# ------------------------------------------------------------------------------ -# -DEFAULT_BULK_COLLECTION_TIME = 1.0 # seconds -DEFAULT_BULK_COLLECTION_SIZE = 100 # seconds - - -# ------------------------------------------------------------------------------ -# -class Update(rpu.Worker): - ''' - An UpdateWorker pushes Task and Pilot state updates to mongodb. Its instances - compete for update requests on the update_queue. Those requests will be - triplets of collection name, query dict, and update dict. Update requests - will be collected into bulks over some time (BULK_COLLECTION_TIME) and - number (BULK_COLLECTION_SIZE) to reduce number of roundtrips. - ''' - - # -------------------------------------------------------------------------- - # - def __init__(self, cfg, session): - - rpu.Worker.__init__(self, cfg, session) - - - # -------------------------------------------------------------------------- - # - def initialize(self): - - self._sid = self._cfg['sid'] - self._dburl = self._reg['cfg.dburl'] - - # get db handle from a connected, non-primary session - self._dbs = DBSession(self._sid, self._dburl, self._log, connect=True) - self._coll = self._dbs._c - self._bulk = self._coll.initialize_ordered_bulk_op() - self._last = time.time() # time of last bulk push - self._uids = list() # list of collected uids - self._lock = ru.Lock() # protect _bulk - - self._db_bulk_time = self._cfg.db_bulk_time - self._db_bulk_size = self._cfg.db_bulk_size - - self.register_subscriber(rpc.STATE_PUBSUB, self._state_cb) - self.register_timed_cb(self._idle_cb, timer=self._db_bulk_time) - - - # -------------------------------------------------------------------------- - # - @classmethod - def create(cls, cfg, session): - - return cls(cfg, session) - - - # -------------------------------------------------------------------------- - # - def _timed_bulk_execute(self, flush=False): - - # is there anything to execute? - if not self._uids: - return True - - now = time.time() - age = now - self._last - - # only push if flush is forced, or when collection time or size - # have been exceeded - if not flush \ - and age < self._db_bulk_time \ - and len(self._uids) < self._db_bulk_size: - return False - - try: - self._bulk.execute() - - except pymongo.errors.OperationFailure as e: - self._log.exception('bulk exec error: %s' % e.details) - raise - - except Exception as e: - self._log.exception('mongodb error: %s', e) - raise - - self._prof.prof('update_pushed', msg='bulk size: %d' % len(self._uids)) - - # for entry in self._uids: - # - # uid = entry[0] - # state = entry[2] - # - # if state: - # self._prof.prof('update_pushed', uid=uid, msg=state) - # else: - # self._prof.prof('update_pushed', uid=uid) - - # empty bulk, refresh state - self._last = now - self._bulk = self._coll.initialize_ordered_bulk_op() - self._uids = list() - - return True - - - # -------------------------------------------------------------------------- - # - def _idle_cb(self): - - with self._lock: - self._timed_bulk_execute() - - return True - - - # -------------------------------------------------------------------------- - # - def _state_cb(self, topic, msg): - ''' - - # FIXME: this documentation is not final, nor does it reflect reality! - - 'msg' is expected to be of the form ['cmd', 'thing'], where 'thing' is - an entity to update in the DB, and 'cmd' specifies the mode of update. - - 'things' are expected to be dicts with a 'type' and 'uid' field. If - either one does not exist, an exception is raised. - - Supported types are: - - - task - - pilot - - supported 'cmds': - - - delete : delete can be delayed until bulk is collected/flushed - - update : update can be delayed until bulk is collected/flushed - - state : update can be delayed until bulk is collected/flushed - only state and state history are updated - - delete_flush: delete is sent immediately (possibly in a bulk) - - update_flush: update is sent immediately (possibly in a bulk) - - state_flush : update is sent immediately (possibly in a bulk) - only state and state history are updated - - flush : flush pending bulk - - The 'thing' can contains '$set' and '$push' fields, which will then be - used as given. For all other fields, we use the following convention: - - - scalar values: use '$set' - - dict values: use '$set' - - list values: use '$push' - - That implies that all potential 'list' types should be defined in the - initial 'thing' insert as such, as (potentially empty) lists. - - For 'cmd' in ['state', 'state_flush'], only the 'uid' and 'state' fields - of the given 'thing' are used, all other fields are ignored. If 'state' - does not exist, an exception is raised. - ''' - - try: - cmd = msg['cmd'] - things = msg['arg'] - - # cmds = ['delete', 'update', 'state', - # 'delete_flush', 'update_flush', 'state_flush', 'flush'] - if cmd not in ['update', 'insert']: - return True - - if cmd == 'insert': - self._dbs.insert_tasks(ru.as_list(things)) - return True - - - # FIXME: we don't have any error recovery -- any failure to update - # state in the DB will thus result in an exception here and tear - # down the module. - for thing in ru.as_list(things): - - # got a new request. Add to bulk (create as needed), - # and push bulk if time is up. - uid = thing['uid'] - ttype = thing['type'] - state = thing['state'] - - if 'clone' in uid: - # we don't push clone states to DB - return True - - # self._prof.prof('update_request', msg=state, uid=uid) - - if not state: - # nothing to push - return True - - # create an update document - update_dict = dict() - update_dict['$set'] = dict() - update_dict['$push'] = dict() - - for key,val in thing.items(): - # never set _id, states (to avoid index clash, doubled ops) - if key not in ['_id', 'states', 'cmds']: - update_dict['$set'][key] = val - - # we set state, put (more importantly) we push the state onto - # the 'states' list, so that we can later get state progression - # in sync with the state model, even if they have been pushed - # here out-of-order - update_dict['$push']['states'] = state - - with self._lock: - - # push the update request onto the bulk - self._uids.append([uid, ttype, state]) - self._bulk.find ({'uid' : uid, - 'type': ttype}) \ - .update(update_dict) - - with self._lock: - # attempt a timed update - self._timed_bulk_execute() - - return True - - except: - return False - - -# ------------------------------------------------------------------------------ - From f509c126bb50af3fbbc1b2cef9e04e608a00585b Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 14 Jun 2023 16:13:53 +0200 Subject: [PATCH 063/171] use hb msg class type --- bin/radical-pilot-agent_n | 29 ++-- bin/radical-pilot-bridge | 11 +- bin/radical-pilot-component | 10 +- bin/radical-pilot-worker | 136 ------------------ src/radical/pilot/pilot.py | 2 +- src/radical/pilot/raptor/worker.py | 3 - src/radical/pilot/session.py | 43 ++++-- src/radical/pilot/task_manager.py | 4 +- .../pilot/tmgr/staging_input/default.py | 3 - src/radical/pilot/utils/component_manager.py | 4 +- 10 files changed, 63 insertions(+), 182 deletions(-) delete mode 100755 bin/radical-pilot-worker diff --git a/bin/radical-pilot-agent_n b/bin/radical-pilot-agent_n index f0978724f6..0e27cfbad1 100755 --- a/bin/radical-pilot-agent_n +++ b/bin/radical-pilot-agent_n @@ -15,6 +15,7 @@ import setproctitle as spt import radical.utils as ru import radical.pilot as rp +from radical.pilot.messages import HeartbeatMessage # ------------------------------------------------------------------------------ # @@ -42,10 +43,7 @@ def main(sid, reg_addr, uid): try: prof.prof('comp_start', uid=uid) prof.disable() - if uid == 'agent_0': - wrapped_agent_0(sid, reg_addr, uid, log, prof) - else: - wrapped_agent_n(sid, reg_addr, uid, log, prof) + wrapped_main(sid, reg_addr, uid, log, prof) finally: prof.enable() @@ -58,6 +56,14 @@ def wrapped_main(sid, reg_addr, uid, log, prof): spt.setproctitle('rp.%s' % uid) + term = mt.Event() + reg = ru.zmq.RegistryClient(url=reg_addr) + + hb_cfg = ru.TypedDict(reg['heartbeat']) + cfg = ru.TypedDict(reg['cfg']) + + reg.close() + if uid == 'agent_0': agent = run_agent_0(sid, reg_addr, uid, log, prof) else: @@ -69,14 +75,14 @@ def wrapped_main(sid, reg_addr, uid, log, prof): hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): - hb_pub.put('heartbeat', msg={'uid': uid}) + hb_pub.put('heartbeat', HeartbeatMessage(uid)) def hb_term_cb(hb_uid): - comp.stop() + agent.stop() term.set() return None - hb = ru.Heartbeat(uid=c_cfg.uid, + hb = ru.Heartbeat(uid=cfg.uid, timeout=hb_cfg.timeout, interval=hb_cfg.interval, beat_cb=hb_beat_cb, @@ -84,12 +90,13 @@ def wrapped_main(sid, reg_addr, uid, log, prof): log=log) hb.start() - # register session heartbeat by beating once - hb.beat(uid=sid) + # always watch out for session heartbeat + hb.watch(uid=sid) # react on session heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == sid: + hb_msg = HeartbeatMessage(from_dict=msg) + if hb_msg.uid == sid: hb.beat(uid=sid) ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, @@ -116,8 +123,6 @@ def run_agent_0(sid, reg_addr, uid, log, prof): reg.close() - - agent = rp.Agent_0(a_cfg, session) return agent diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index a170f09b2b..d26d78a84a 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -13,6 +13,8 @@ import setproctitle as spt import radical.utils as ru +from radical.pilot.messages import HeartbeatMessage + # ------------------------------------------------------------------------------ # @@ -117,7 +119,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): - hb_pub.put('heartbeat', msg={'uid': uid}) + hb_pub.put('heartbeat', HeartbeatMessage(b_cfg.uid)) def hb_term_cb(hb_uid): bridge.stop() @@ -132,12 +134,13 @@ def wrapped_main(sid, reg_addr, uid, log, prof): log=log) hb.start() - # register session heartbeat by beating once - hb.beat(uid=sid) + # always watch out for session heartbeat + hb.watch(uid=sid) # react on session heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == sid: + hb_msg = HeartbeatMessage(from_dict=msg) + if hb_msg.uid == sid: hb.beat(uid=sid) ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index fe63098254..8c85a5145a 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -14,6 +14,7 @@ import setproctitle as spt import radical.utils as ru import radical.pilot as rp +from radical.pilot.messages import HeartbeatMessage # ------------------------------------------------------------------------------ # @@ -92,7 +93,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): - hb_pub.put('heartbeat', msg={'uid': uid}) + hb_pub.put('heartbeat', HeartbeatMessage(uid)) def hb_term_cb(hb_uid): comp.stop() @@ -107,12 +108,13 @@ def wrapped_main(sid, reg_addr, uid, log, prof): log=log) hb.start() - # register session heartbeat by beating once - hb.beat(uid=sid) + # always watch out for session heartbeat + hb.watch(uid=sid) # react on session heartbeats def hb_sub_cb(topic, msg): - if msg['uid'] == sid: + hb_msg = HeartbeatMessage(from_dict=msg) + if hb_msg.uid == sid: hb.beat(uid=sid) ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, diff --git a/bin/radical-pilot-worker b/bin/radical-pilot-worker deleted file mode 100755 index e3d1ea0289..0000000000 --- a/bin/radical-pilot-worker +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python3 - - -import sys -import time - -import threading as mt -import setproctitle as spt - -import radical.utils as ru -import radical.pilot as rp - -dh = ru.DebugHelper() - - -# ------------------------------------------------------------------------------ -# -def main(cfg): - ''' - This thin wrapper starts an RCT task overlay worker. It expects a single - argument: a config to use for the worker's configuration. - That config must contain: - - - 'uid' : UID of worker instance (unique to the hosting session) - - 'path': sandbox for log files etc. - - If the config contains a `heartbeat` section, that section must be formatted - as follows: - - { - 'from' : 'uid', - 'addr_pub': 'addr_pub', - 'addr_sub': 'addr_sub', - 'interval': , - 'timeout' : - } - - If that section exists, heartbeats are used to manage the worker's lifetime: - the lifetime of this worker is then dependent on receiving heartbeats from - the given `uid`: after `timeout` seconds of no heartbeats arriving, the - worker will terminate. The worker itself will publish heartbeats every - `interval` seconds on the heartbeat channel under its own uid. - - If the heartbeat section is not present in the config file, the worker's - lifetime is expected to be explicitly managed, i.e., that this wrapper - process hosting the worker is terminated externally. - - The config file may contain other entries which are passed to the worker and - are interpreted by the component implementation. - ''' - - # basic setup: cfg, logger and profiler - log = ru.Logger(name=cfg.uid, ns='radical.pilot', path=cfg.path) - prof = ru.Profiler(name=cfg.uid, ns='radical.pilot', path=cfg.path) - - try: - prof.prof('worker_start', uid=cfg.uid) - prof.disable() - wrapped_main(cfg, log, prof) - except: - prof.enable() - prof.prof('worker_fail', uid=cfg.uid) - finally: - prof.enable() - prof.prof('worker_stop', uid=cfg.uid) - - -def wrapped_main(cfg, log, prof): - - term = mt.Event() - - spt.setproctitle('rp.%s' % cfg.uid) - - # start a non-primary session - session = rp.Session(cfg=cfg, _role=rp.Session._DEFAULT) - - # create the component and begin to work - worker = rp.utils.Component.create(cfg, session) - worker.start() - - # component runs - send heartbeats so that cmgr knows about it - hb_pub = ru.zmq.Publisher ('heartbeat', cfg.heartbeat.addr_pub) - - def hb_beat_cb(): - hb_pub.put('heartbeat', msg={'uid': cfg.uid}) - - def hb_term_cb(hb_uid): - worker.stop() - term.set() - return None - - hb = ru.Heartbeat(uid=cfg.uid, - timeout=cfg.heartbeat.timeout, - interval=cfg.heartbeat.interval, - beat_cb=hb_beat_cb, - term_cb=hb_term_cb, - log=log) - hb.start() - - # register cmgr heartbeat by beating once - hb.beat(uid=cfg.cmgr) - - # record cmgr heartbeats - def hb_sub_cb(topic, msg): - if msg['uid'] == cfg.cmgr: - hb.beat(uid=cfg.cmgr) - - ru.zmq.Subscriber('heartbeat', cfg.heartbeat.addr_sub, - topic='heartbeat', cb=hb_sub_cb, - log=log, prof=prof) - - # all is set up - we can sit idle 'til end of time. - while not term.is_set(): - time.sleep(1) - - -# ------------------------------------------------------------------------------ -# -if __name__ == "__main__": - - if len(sys.argv) != 2: - sys.stderr.write('error: invalid arguments\n' - 'usage: %s \n' % sys.argv[0]) - raise RuntimeError('invalid arguments: %s' % sys.argv) - - fname = sys.argv[1] - cfg = ru.Config(path=fname) - path = '%s/%s' % (cfg.path, cfg.uid) - - # NOTE: this script runs as an RP task and will *not* daemonize - - main(cfg) - - -# ------------------------------------------------------------------------------ - diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index dcc8117f3d..6af1f3a6f4 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -722,7 +722,7 @@ def _control_cb(self, topic, msg): if cmd == 'rpc_res': - self._log.debug('==== rpc res: %s', arg) + self._log.debug('rpc res: %s', arg) self._rpc_queue.put(arg) diff --git a/src/radical/pilot/raptor/worker.py b/src/radical/pilot/raptor/worker.py index 32df4992b1..6b358e4683 100644 --- a/src/radical/pilot/raptor/worker.py +++ b/src/radical/pilot/raptor/worker.py @@ -146,9 +146,6 @@ def _hb_worker(self): # def _state_cb(self, topic, things): - import pprint - self._log.debug('=== msg %s: %s', topic, pprint.pformat(things)) - for thing in ru.as_list(things): uid = thing['uid'] diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index a8c7e50e55..7e0301ab42 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -16,9 +16,10 @@ import radical.saga.filesystem as rsfs import radical.saga.utils.pty_shell as rsup -from . import constants as rpc -from . import utils as rpu -from .proxy import Proxy +from . import constants as rpc +from . import utils as rpu +from .proxy import Proxy +from .messages import HeartbeatMessage # ------------------------------------------------------------------------------ @@ -160,9 +161,6 @@ def __init__(self, proxy_url: Optional[str ] = None, """ self._t_start = time.time() - if uid: self._uid = uid - else : self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) - self._role = _role self._uid = uid self._cfg = ru.Config(cfg=cfg) @@ -526,34 +524,49 @@ def _start_heartbeat(self): prof=self._prof) - # start the heartbeat monitor, but first define its callbacks + # -------------------------------------- + # start the heartbeat monitor, but first + # define its callbacks def _hb_beat_cb(): # called on every heartbeat: cfg.heartbeat.interval` # publish own heartbeat - self._hb_pub.put('heartbeat', - {'cmd' : 'heartbeat', - 'args': {'uid': self._uid}}) + self._hb_pub.put('heartbeat', HeartbeatMessage(self._uid)) + # also update proxy heartbeat self._proxy.request('heartbeat', {'sid': self._uid}) + # -------------------------------------- + # -------------------------------------- + # called when some entity misses + # heartbeats: `cfg.heartbeat.timeout` def _hb_term_cb(): - # called when some entity misses heartbeats: `cfg.heartbeat.timeout` if self._cmgr: self._cmgr.close() return False + # -------------------------------------- # create heartbeat manager which monitors all components in this session + self._log.debug('=== hb %s from session', self._uid) self._hb = ru.Heartbeat(uid=self._uid, timeout=self._cfg.heartbeat.timeout, interval=self._cfg.heartbeat.interval, beat_cb=_hb_beat_cb, term_cb=_hb_term_cb, log=self._log) + self._hb.start() - # subscribe to heartbeat messages on the pubsub + # -------------------------------------- + # subscribe to heartbeat msgs and inform + # self._hb about every heartbeat def _hb_msg_cb(topic, msg): - # inform the heartbeat manager about every received heartbeat - self._hb.beat(msg['uid']) + + hb_msg = HeartbeatMessage(from_dict=msg) + + self._log.debug('msg: %s', msg) + + if hb_msg.uid != self._uid: + self._hb.beat(uid=hb_msg.uid) + # -------------------------------------- ru.zmq.Subscriber(channel='heartbeat_pubsub', topic='heartbeat', @@ -668,7 +681,7 @@ def crosswire_pubsub(self, src, tgt): reg = self._reg url = reg['bridges.%s.addr_pub' % tgt.lower()] - self._log.debug('=== cross %s %s', url, tgt) + self._log.debug('cross %s %s', url, tgt) tgt_pub = ru.zmq.Publisher(channel=tgt, path=path, url=url) diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index b4481d7460..87a63a6c7e 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -651,7 +651,7 @@ def _control_cb(self, topic, msg): if cmd == 'rpc_res': - self._log.debug('==== rpc res: %s', arg) + self._log.debug('rpc res: %s', arg) self._rpc_queue.put(arg) @@ -870,8 +870,6 @@ def submit_tasks(self, descriptions): self._rep.progress_tgt(len(descriptions), label='submit') for td in descriptions: - self._rep.progress() - mode = td.mode if mode == RAPTOR_MASTER: diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 8eb8f8599a..9d03fd6bed 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -129,9 +129,6 @@ def _advance_tasks(self, tasks, pid=None, state=None, push=True): if not state: state = rps.AGENT_STAGING_INPUT_PENDING - for task in tasks: - self._log.debug('=== to PTQ: %s / %s', task['uid'], pid) - # perform and publish state update # push to the proxy queue self.advance(tasks, state, publish=True, push=push, qname=pid) diff --git a/src/radical/pilot/utils/component_manager.py b/src/radical/pilot/utils/component_manager.py index b6213dfc67..bbfa75e1e3 100644 --- a/src/radical/pilot/utils/component_manager.py +++ b/src/radical/pilot/utils/component_manager.py @@ -13,6 +13,7 @@ from .. import constants as rpc from .. import states as rps +from ..messages import HeartbeatMessage # ------------------------------------------------------------------------------ @@ -71,7 +72,8 @@ def __init__(self, sid, reg_addr, owner): # def _hb_msg_cb(self, topic, msg): - self._heartbeats[msg['uid']] = time.time() + hb_msg = HeartbeatMessage(from_dict=msg) + self._heartbeats[hb_msg.uid] = time.time() # -------------------------------------------------------------------------- From 2fc34ca5e1fc1fa41ff77bdafd6996fc2461be2f Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Wed, 14 Jun 2023 15:43:24 -0400 Subject: [PATCH 064/171] Enable doc testing --- .github/workflows/docs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index ec443e7c37..beeb110bce 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,10 +9,10 @@ name: 'Test Jupyter notebooks' on: push: branches: - - docs/nb_section3 + - feature/nodb_2 pull_request: branches: - - docs/nb_section3 + - feature/nodb_2 # This allows a subsequently queued workflow run to interrupt previous runs concurrency: @@ -43,4 +43,4 @@ jobs: with: python-version: 3.7 notebook-name: ${{ matrix.tutorial }} - notebook-path: 'tutorials' \ No newline at end of file + notebook-path: 'tutorials' From d9982683aa6c60f1698819e71667e01ed6663b57 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Wed, 14 Jun 2023 15:47:40 -0400 Subject: [PATCH 065/171] Enable testing of all notebooks --- .github/workflows/docs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index beeb110bce..e06607757f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,11 +31,11 @@ jobs: matrix: tutorial: [ 'configuration.ipynb', - # 'debugging.ipynb', + 'debugging.ipynb', 'describing_tasks.ipynb', 'multiple_pilots.ipynb', - # 'profiling.ipynb', - # 'raptor.ipynb', + 'profiling.ipynb', + 'raptor.ipynb', 'staging_data.ipynb', 'submission.ipynb' ] From 95609c2b5c1ad5e3aa2bac95ce2e8d2f4287e249 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Wed, 14 Jun 2023 15:56:45 -0400 Subject: [PATCH 066/171] Install the required branch of RU --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4302453f4d..8c3e459f3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ - -radical.utils>=1.34 +radical.utils @ git+https://github.com/radical-cybertools/radical.utils@use_registry radical.saga>=1.12 radical.gtod setproctitle From 85dc249a2679539de154d59fb287be996db13358 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Wed, 14 Jun 2023 15:59:33 -0400 Subject: [PATCH 067/171] fix branch name --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8c3e459f3e..6faa31d31b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -radical.utils @ git+https://github.com/radical-cybertools/radical.utils@use_registry +radical.utils @ git+https://github.com/radical-cybertools/radical.utils@feature/use_registry radical.saga>=1.12 radical.gtod setproctitle From 0ffafc5ee819e12062c5e5ee0e99335681d7844f Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 14 Jun 2023 22:55:09 +0200 Subject: [PATCH 068/171] add missing file --- src/radical/pilot/messages.py | 59 +++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 src/radical/pilot/messages.py diff --git a/src/radical/pilot/messages.py b/src/radical/pilot/messages.py new file mode 100644 index 0000000000..5930c1d405 --- /dev/null +++ b/src/radical/pilot/messages.py @@ -0,0 +1,59 @@ + + +from typing import Optional, Dict, Any + +import radical.utils as ru + + +# ------------------------------------------------------------------------------ +# +class HeartbeatMessage(ru.Message): + + # ------------------------------ + class Payload(ru.TypedDict): + _schema = {'uid': str } + _defaults = {'uid': None } + # ------------------------------ + + _schema = { + 'payload': Payload + } + + _defaults = { + 'msg_type': 'heartbeat', + 'payload' : {} + } + + + + # -------------------------------------------------------------------------- + def __init__(self, uid : Optional[str] = None, + from_dict: Optional[Dict[str, Any]] = None): + ''' + support msg construction and usage like this: + + hb_msg = rp.HeartbeatMessage(uid='foo.1') + assert hb_msg.uid == 'foo.1 + + ''' + + if uid: + from_dict = {'payload': {'uid': uid}} + + super().__init__(from_dict=from_dict) + + + # -------------------------------------------------------------------------- + @property + def uid(self): + return self.payload.uid + + @uid.setter + def uid(self, value): + self.payload.uid = value + + +ru.Message.register_msg_type('heartbeat', HeartbeatMessage) + +# ------------------------------------------------------------------------------ + From 1e39a6a89b8fec53c6a606ace25633739be0d461 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 14 Jun 2023 23:27:31 +0200 Subject: [PATCH 069/171] add missing file --- bin/radical-pilot-agent_0 | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100755 bin/radical-pilot-agent_0 diff --git a/bin/radical-pilot-agent_0 b/bin/radical-pilot-agent_0 new file mode 100755 index 0000000000..ed2d5e2286 --- /dev/null +++ b/bin/radical-pilot-agent_0 @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 + +__copyright__ = "Copyright 2014-2019, http://radical.rutgers.edu" +__license__ = "MIT" + +import radical.pilot as rp + + +# ------------------------------------------------------------------------------ +# +if __name__ == "__main__": + + agent = rp.Agent_0() + agent.start() + agent.wait() + + +# ------------------------------------------------------------------------------ + From 52fa86537913fe12bf098ff533e79fab23662486 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 15 Jun 2023 08:19:32 +0200 Subject: [PATCH 070/171] dbs cleanup --- src/radical/pilot/pilot_manager.py | 15 ++++++++------- src/radical/pilot/task_manager.py | 26 ++++++++++++++------------ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index e5c87128aa..4c26ad3c51 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -638,15 +638,15 @@ def _reconnect_pilots(self): # self.is_valid() # FIXME MONGODB - pilot_docs = self._session._dbs.get_pilots(pmgr_uid=self.uid) + # pilot_docs = self._session._dbs.get_pilots(pmgr_uid=self.uid) - with self._pilots_lock: - for ud in pilot_docs: + # with self._pilots_lock: + # for ud in pilot_docs: - descr = PilotDescription(ud['description']) - pilot = Pilot(pmgr=self, descr=descr) + # descr = PilotDescription(ud['description']) + # pilot = Pilot(pmgr=self, descr=descr) - self._pilots[pilot.uid] = pilot + # self._pilots[pilot.uid] = pilot # -------------------------------------------------------------------------- @@ -830,7 +830,8 @@ def cancel_pilots(self, uids=None, _timeout=None): # send the cancellation request to the pilots # FIXME: the cancellation request should not go directly to the DB, but # through the DB abstraction layer... - self._session._dbs.pilot_command('cancel_pilot', [], uids) + # FIXME: MongoDB + # self._session._dbs.pilot_command('cancel_pilot', [], uids) # wait for the cancel to be enacted self.wait_pilots(uids=uids, timeout=_timeout) diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index 87a63a6c7e..ff6cdd3666 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -922,19 +922,21 @@ def _reconnect_tasks(self): from .task import Task from .task_description import TaskDescription - task_docs = self._session._dbs.get_tasks(tmgr_uid=self.uid) + # FIXME MongoDB - with self._tasks_lock: - - for doc in task_docs: - - td = TaskDescription(doc['description']) - td.uid = doc['uid'] - - task = Task(tmgr=self, descr=td, origin='client') - task._update(doc, reconnect=True) - - self._tasks[task.uid] = task + # task_docs = self._session._dbs.get_tasks(tmgr_uid=self.uid) + # + # with self._tasks_lock: + # + # for doc in task_docs: + # + # td = TaskDescription(doc['description']) + # td.uid = doc['uid'] + # + # task = Task(tmgr=self, descr=td, origin='client') + # task._update(doc, reconnect=True) + # + # self._tasks[task.uid] = task # -------------------------------------------------------------------------- From 3d4c6f06a111a615e599122faffe2e739a3a9986 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 17 Jul 2023 19:29:33 +0200 Subject: [PATCH 071/171] snapshot --- bin/radical-pilot-bridge | 76 ++++++++------- examples/00_getting_started.py | 2 +- examples/misc/raptor_simple.py | 7 +- src/radical/pilot/agent/agent_0.py | 7 +- src/radical/pilot/agent/bootstrap_0.sh | 2 + src/radical/pilot/pilot.py | 28 ++++-- src/radical/pilot/pilot_manager.py | 33 ++++--- src/radical/pilot/proxy.py | 3 +- src/radical/pilot/raptor_tasks.py | 9 +- src/radical/pilot/session.py | 99 ++++++++++++++++---- src/radical/pilot/utils/component.py | 8 +- src/radical/pilot/utils/component_manager.py | 7 +- 12 files changed, 191 insertions(+), 90 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index d26d78a84a..476cfb9347 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -97,6 +97,9 @@ def main(sid, reg_addr, uid): # def wrapped_main(sid, reg_addr, uid, log, prof): + sys.stdout = ru.ru_open('/tmp/%s.out' % uid, 'w') + sys.stderr = ru.ru_open('/tmp/%s.err' % uid, 'w') + spt.setproctitle('rp.%s' % uid) term = mt.Event() @@ -114,38 +117,47 @@ def wrapped_main(sid, reg_addr, uid, log, prof): reg.close() bridge.start() - # bridge runs - send heartbeats so that cmgr knows about it - # component runs - send heartbeats so that session knows about it - hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) - - def hb_beat_cb(): - hb_pub.put('heartbeat', HeartbeatMessage(b_cfg.uid)) - - def hb_term_cb(hb_uid): - bridge.stop() - term.set() - return None - - hb = ru.Heartbeat(uid=b_cfg.uid, - timeout=hb_cfg.timeout, - interval=hb_cfg.interval, - beat_cb=hb_beat_cb, - term_cb=hb_term_cb, - log=log) - hb.start() - - # always watch out for session heartbeat - hb.watch(uid=sid) - - # react on session heartbeats - def hb_sub_cb(topic, msg): - hb_msg = HeartbeatMessage(from_dict=msg) - if hb_msg.uid == sid: - hb.beat(uid=sid) - - ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, - topic='heartbeat', cb=hb_sub_cb, - log=log, prof=prof) + if 'pubsub' in uid: + d = ru.zmq.test_pubsub(bridge.channel, bridge.addr_pub, bridge.addr_sub) + print('%.1f' % time.time(), d) + + sys.stdout.flush() + sys.stderr.flush() + + # if hb_cfg: + # + # # bridge runs - send heartbeats so that cmgr knows about it + # # component runs - send heartbeats so that session knows about it + # hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) + # + # def hb_beat_cb(): + # hb_pub.put('heartbeat', HeartbeatMessage(b_cfg.uid)) + # + # def hb_term_cb(hb_uid): + # bridge.stop() + # term.set() + # return None + # + # hb = ru.Heartbeat(uid=b_cfg.uid, + # timeout=hb_cfg.timeout, + # interval=hb_cfg.interval, + # beat_cb=hb_beat_cb, + # term_cb=hb_term_cb, + # log=log) + # hb.start() + # + # # always watch out for session heartbeat + # hb.watch(uid=sid) + # + # # react on session heartbeats + # def hb_sub_cb(topic, msg): + # hb_msg = HeartbeatMessage(from_dict=msg) + # if hb_msg.uid == sid: + # hb.beat(uid=sid) + # + # ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, + # topic='heartbeat', cb=hb_sub_cb, + # log=log, prof=prof) # all is set up - we can sit idle 'til end of time. while not term.is_set(): diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 0e4fbb96c8..d1b520707c 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -66,7 +66,7 @@ pilot = pmgr.submit_pilots(pdesc) - n = 1 # number of tasks to run + n = 1024 * 2 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. diff --git a/examples/misc/raptor_simple.py b/examples/misc/raptor_simple.py index edcbff39de..d8ce99c327 100755 --- a/examples/misc/raptor_simple.py +++ b/examples/misc/raptor_simple.py @@ -41,9 +41,10 @@ def task_state_cb(task, state): tmgr.wait_tasks(task.uid) print('%s [%s]: %s' % (task.uid, task.state, task.stdout)) - raptor.rpc('stop') - tmgr.wait_tasks(raptor.uid) - print('%s [%s]: %s' % (raptor.uid, raptor.state, raptor.stdout)) + # FIXME: MongoDB + # raptor.rpc('stop') + # tmgr.wait_tasks(raptor.uid) + # print('%s [%s]: %s' % (raptor.uid, raptor.state, raptor.stdout)) finally: session.close(download=False) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index aca44ca16a..aec72eda2a 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -199,6 +199,9 @@ def _configure_app_comm(self): # def initialize(self): + # handle pilot commands + self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) + # listen for new tasks from the client self.register_input(rps.AGENT_STAGING_INPUT_PENDING, rpc.PROXY_TASK_QUEUE, @@ -242,7 +245,7 @@ def initialize(self): 'cpu' : rm_info['cores_per_node'] * n_nodes, 'gpu' : rm_info['gpus_per_node'] * n_nodes}} - self.advance(pilot, publish=True, push=False) + self.advance(pilot, publish=True, push=False, fwd=True) # -------------------------------------------------------------------------- @@ -612,6 +615,7 @@ def _control_cb(self, _, msg): req = arg['rpc'] if req not in ['hello', 'prepare_env']: + # we don't handle that request return True @@ -628,6 +632,7 @@ def _control_cb(self, _, msg): else: # unknown command + self._log.info('ignore rpc command: %s', req) return True # request succeeded - respond with return value diff --git a/src/radical/pilot/agent/bootstrap_0.sh b/src/radical/pilot/agent/bootstrap_0.sh index 734c35e394..c1891e320b 100755 --- a/src/radical/pilot/agent/bootstrap_0.sh +++ b/src/radical/pilot/agent/bootstrap_0.sh @@ -1779,6 +1779,8 @@ fi # disable user site packages as those can conflict with our virtualenv export PYTHONNOUSERSITE=True +export RP_PILOT_ID="$PILOT_ID" + env_prep -t env/agent.env # we create a bootstrap_2.sh which sets the environment sub-agents diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 6af1f3a6f4..f07634d3a8 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -158,16 +158,18 @@ def __init__(self, pmgr: PilotManager, descr): # hook into the control pubsub for rpc handling self._rpc_queue = queue.Queue() - ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] - ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] + self._ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] + self._ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] - ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=ctrl_addr_sub, + ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=self._ctrl_addr_sub, log=self._log, prof=self._prof, cb=self._control_cb, topic=rpc.CONTROL_PUBSUB) - self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, url=ctrl_addr_pub, + self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, url=self._ctrl_addr_pub, log=self._log, prof=self._prof) + ru.zmq.test_pubsub(rpc.CONTROL_PUBSUB, self._ctrl_addr_pub, self._ctrl_addr_sub) + # -------------------------------------------------------------------------- # @@ -583,6 +585,15 @@ def wait(self, state=None, timeout=None): def cancel(self): """Cancel the pilot.""" + self._finalize() + + self._pmgr.cancel_pilots(self._uid) + + + # -------------------------------------------------------------------------- + # + def _finalize(self): + # clean connection cache try: for key in self._cache: @@ -592,8 +603,6 @@ def cancel(self): except: pass - self._pmgr.cancel_pilots(self.uid) - # -------------------------------------------------------------------------- # @@ -728,7 +737,7 @@ def _control_cb(self, topic, msg): # -------------------------------------------------------------------------- # - def rpc(self, cmd, args): + def rpc(self, cmd, args=None): '''Remote procedure call. Send am RPC command and arguments to the pilot and wait for the @@ -736,16 +745,19 @@ def rpc(self, cmd, args): thread safe to have multiple concurrent RPC calls. ''' + if not args: + args = dict() + rpc_id = ru.generate_id('rpc') rpc_req = {'uid' : rpc_id, 'rpc' : cmd, 'tgt' : self._uid, 'arg' : args} + self._ctrl_pub.put(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_req', 'arg': rpc_req, 'fwd': True}) - rpc_res = self._rpc_queue.get() self._log.debug('rpc result: %s', rpc_res['ret']) diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index bb5f68eea4..8292bb1334 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -801,12 +801,14 @@ def _fail_missing_pilots(self): cancellation command in due time, if they can """ - with self._pilots_lock: - for pid in self._pilots: - pilot = self._pilots[pid] - if pilot.state not in rps.FINAL: - self.advance(pilot.as_dict(), rps.FAILED, - publish=True, push=False) + pass + + # with self._pilots_lock: + # for pid in self._pilots: + # pilot = self._pilots[pid] + # if pilot.state not in rps.FINAL: + # self.advance(pilot.as_dict(), rps.FAILED, + # publish=True, push=False) # -------------------------------------------------------------------------- @@ -826,22 +828,25 @@ def cancel_pilots(self, uids=None, _timeout=None): if not isinstance(uids, list): uids = [uids] - with self._pilots_lock: - for uid in uids: - if uid not in self._pilots: - raise ValueError('pilot %s not known' % uid) - self._log.debug('pilot(s).need(s) cancellation %s', uids) # send the cancellation request to the pilots - # FIXME: the cancellation request should not go directly to the DB, but - # through the DB abstraction layer... # FIXME: MongoDB # self._session._dbs.pilot_command('cancel_pilot', [], uids) - + self._log.debug('=== issue cancel_pilots for %s', uids) + self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'cancel_pilots', + 'arg' : {'pmgr' : self.uid, + 'uids' : uids}}) # wait for the cancel to be enacted self.wait_pilots(uids=uids, timeout=_timeout) + # FIXME: only finalize pilots which actually terminated + with self._pilots_lock: + for uid in uids: + if uid not in self._pilots: + raise ValueError('pilot %s not known' % uid) + self._pilots[uid]._finalize() + # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py index 9f0c096407..9e13d4cbb2 100644 --- a/src/radical/pilot/proxy.py +++ b/src/radical/pilot/proxy.py @@ -327,7 +327,8 @@ def _heartbeat(self, arg): with self._lock: if sid not in self._clients: - raise RuntimeError('client %s not ' % sid) + self._log.warn('client %s not in %s', sid, self._clients) + return self._clients[sid]['hb'] = now diff --git a/src/radical/pilot/raptor_tasks.py b/src/radical/pilot/raptor_tasks.py index 92162cd85f..ff8d2e4e18 100644 --- a/src/radical/pilot/raptor_tasks.py +++ b/src/radical/pilot/raptor_tasks.py @@ -66,7 +66,14 @@ def rpc(self, rpc: str, if not self._pilot: raise RuntimeError('not assoigned to a pilot yet, cannot run rpc') - return self._tmgr.pilot_rpc(self._pilot, rpc, args) + cmd = 'raptor_rpc' + + if not args: + args = dict() + + args['raptor_cmd'] = rpc + + return self._tmgr.pilot_rpc(self._pilot, cmd, args) # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 7e0301ab42..550c6aa00c 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -141,7 +141,7 @@ def __init__(self, proxy_url: Optional[str ] = None, _role (`bool`): only `PRIMARY` sessions created by the original application process (via `rp.Session()`), will create proxies - and Registry Serivices. `AGENT` sessions will also create + and Registry Services. `AGENT` sessions will also create a Registry but no proxies. All other `DEFAULT` session instances are instantiated internally in processes spawned (directly or indirectly) by the initial session, for example in @@ -178,6 +178,13 @@ def __init__(self, proxy_url: Optional[str ] = None, self._tmgrs = dict() # map IDs to tmgr instances self._cmgr = None # only primary sessions have a cmgr + + # this session is either living in the client applicatio or lives in the + # scope of a pilot. In the latter case we expect `RP_PILOT_ID` to be + # set - we derive the session module scope from that env variable. + self._module = os.environ.get('RP_PILOT_ID', 'client') + + # non-primary sessions need a uid! if self._role != self._PRIMARY and not self._uid: raise ValueError('non-primary session needs UID (%s)' % self._role) @@ -262,6 +269,13 @@ def _init_primary(self): # start bridges and components self._start_components() + # primary session hooks into the control pubsub + bcfg = self._reg['bridges.%s' % rpc.CONTROL_PUBSUB] + self._ctrl_pub = ru.zmq.Publisher(channel=rpc.CONTROL_PUBSUB, + url=bcfg['addr_pub'], + log=self._log, + prof=self._prof) + # crosswire local channels and proxy channels self._crosswire_proxy() @@ -513,6 +527,10 @@ def _start_heartbeat(self): self._hb_pubsub.start() time.sleep(1) + ru.zmq.test_pubsub(self._hb_pubsub.channel, + self._hb_pubsub.addr_pub, + self._hb_pubsub.addr_sub), + # fill 'cfg.heartbeat' section self._cfg.heartbeat.addr_pub = str(self._hb_pubsub.addr_pub) self._cfg.heartbeat.addr_sub = str(self._hb_pubsub.addr_sub) @@ -533,7 +551,8 @@ def _hb_beat_cb(): self._hb_pub.put('heartbeat', HeartbeatMessage(self._uid)) # also update proxy heartbeat - self._proxy.request('heartbeat', {'sid': self._uid}) + if self._proxy: + self._proxy.request('heartbeat', {'sid': self._uid}) # -------------------------------------- # -------------------------------------- @@ -675,26 +694,57 @@ def _connect_proxy(self): # ---------------------------------------------------------------------- - def crosswire_pubsub(self, src, tgt): + def crosswire_pubsub(self, src, tgt, from_proxy): + + # we only forward messages which have either no origin set (in this case + # this method sets the origin), or whose origin is the same as + # configured when crosswiring the channels (either 'client' or the pilot + # ID). path = self._cfg.path reg = self._reg - url = reg['bridges.%s.addr_pub' % tgt.lower()] - self._log.debug('cross %s %s', url, tgt) - tgt_pub = ru.zmq.Publisher(channel=tgt, path=path, - url=url) + url_sub = reg['bridges.%s.addr_sub' % src.lower()] + url_pub = reg['bridges.%s.addr_pub' % tgt.lower()] + + self._log.debug('XXX cfg fwd for topic:%s to %s', src, tgt) + self._log.debug('XXX cfg fwd for %s to %s', url_sub, url_pub) + + publisher = ru.zmq.Publisher(channel=tgt, path=path, url=url_pub) def pubsub_fwd(topic, msg): - if msg.get('fwd'): # only forward if requested - del msg['fwd'] # only forward once - tgt_pub.put(tgt, msg) - self._log.debug('=== === fwd %s to %s: %s', src, tgt, msg) + + if 'origin' not in msg: + msg['origin'] = self._module + + # self._log.debug('XXX =?= fwd %s to %s: %s [%s - %s]', src, tgt, msg, + # msg['origin'], self._module) + + if from_proxy: + + # all messages *from* the proxy are forwarded - but not the ones + # which orginated in *this* module in the first place + + if msg['origin'] == self._module: + self._log.debug('XXX =>! fwd %s to topic:%s: %s', src, tgt, msg) + + else: + self._log.debug('XXX =>> fwd %s to topic:%s: %s', src, tgt, msg) + publisher.put(tgt, msg) + else: - self._log.debug('=== =!= fwd %s to %s: %s', src, tgt, msg) - ru.zmq.Subscriber(channel=src, path=path, cb=pubsub_fwd, - url=reg['bridges.%s.addr_sub' % src.lower()]) + # *to* proxy: forward all messages which originated in *this* + # module + + if msg['origin'] == self._module: + self._log.debug('XXX ==> fwd %s to topic:%s: %s', src, tgt, msg) + publisher.put(tgt, msg) + + else: + self._log.debug('XXX =!> fwd %s to topic:%s: %s', src, tgt, msg) + + ru.zmq.Subscriber(channel=src, topic=src, path=path, cb=pubsub_fwd, url=url_sub) # -------------------------------------------------------------------------- @@ -726,14 +776,18 @@ def _crosswire_proxy(self): assert self._role in [self._PRIMARY, self._AGENT_0] self.crosswire_pubsub(src=rpc.CONTROL_PUBSUB, - tgt=rpc.PROXY_CONTROL_PUBSUB) + tgt=rpc.PROXY_CONTROL_PUBSUB, + from_proxy=False) self.crosswire_pubsub(src=rpc.PROXY_CONTROL_PUBSUB, - tgt=rpc.CONTROL_PUBSUB) + tgt=rpc.CONTROL_PUBSUB, + from_proxy=True) self.crosswire_pubsub(src=rpc.STATE_PUBSUB, - tgt=rpc.PROXY_STATE_PUBSUB) + tgt=rpc.PROXY_STATE_PUBSUB, + from_proxy=False) self.crosswire_pubsub(src=rpc.PROXY_STATE_PUBSUB, - tgt=rpc.STATE_PUBSUB) + tgt=rpc.STATE_PUBSUB, + from_proxy=True) # -------------------------------------------------------------------------- @@ -795,6 +849,12 @@ def close(self, **kwargs): options = self._close_options + if options.terminate: + # terminate all components + if self._role == self._PRIMARY: + self._ctrl_pub.put(rpc.CONTROL_PUBSUB, {'cmd': 'terminate', + 'arg': None}) + for tmgr_uid, tmgr in self._tmgrs.items(): self._log.debug("session %s closes tmgr %s", self._uid, tmgr_uid) tmgr.close() @@ -811,8 +871,7 @@ def close(self, **kwargs): if self._proxy: try: self._log.debug("session %s closes service", self._uid) - self._proxy.request('unregister', - {'sid': self._uid}) + self._proxy.request('unregister', {'sid': self._uid}) except: pass diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 72a5608632..fbc312a438 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -323,7 +323,7 @@ def _cancel_monitor_cb(self, topic, msg): # currently have no abstract 'cancel' command, but instead use # 'cancel_tasks'. - # self._log.debug('command incoming: %s', msg) + self._log.debug('command incoming: %s', msg) cmd = msg['cmd'] arg = msg['arg'] @@ -904,7 +904,7 @@ def work_cb(self): # -------------------------------------------------------------------------- # def advance(self, things, state=None, publish=True, push=False, qname=None, - ts=None, prof=True): + ts=None, fwd=False, prof=True): ''' Things which have been operated upon are pushed down into the queues again, only to be picked up by the next component, according to their @@ -990,7 +990,9 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, del thing['$set'] to_publish.append(tmp) - self.publish(rpc.STATE_PUBSUB, {'cmd': 'update', 'arg': to_publish}) + self.publish(rpc.STATE_PUBSUB, {'cmd': 'update', + 'arg': to_publish, + 'fwd': fwd}) # ts = time.time() # for thing in things: diff --git a/src/radical/pilot/utils/component_manager.py b/src/radical/pilot/utils/component_manager.py index bbfa75e1e3..7d181ce143 100644 --- a/src/radical/pilot/utils/component_manager.py +++ b/src/radical/pilot/utils/component_manager.py @@ -135,15 +135,10 @@ def start_bridges(self, bridges): self._reg['bridges.%s.cfg' % bname] = bcfg # self._reg.put('bridge.%s' % bname, bcfg) - - self._log.info('create bridge %s [%s]', bname, bcfg.uid) - cmd = 'radical-pilot-bridge %s %s %s' \ % (self._sid, self._reg.url, bname) - out, err, ret = ru.sh_callout(cmd, cwd=self._cfg.path) - self._log.debug('bridge startup out: %s', out) - self._log.debug('bridge startup err: %s', err) + out, err, ret = ru.sh_callout(cmd, cwd=self._cfg.path) if ret: raise RuntimeError('bridge startup failed') From 9d47f6c2f93ca5e085c2ade1c298e0621891e541 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 17 Jul 2023 23:00:54 +0200 Subject: [PATCH 072/171] fewer logfiles --- src/radical/pilot/configs/tmgr_default.json | 6 +++--- src/radical/pilot/session.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/radical/pilot/configs/tmgr_default.json b/src/radical/pilot/configs/tmgr_default.json index 510a529849..6f096a41c6 100644 --- a/src/radical/pilot/configs/tmgr_default.json +++ b/src/radical/pilot/configs/tmgr_default.json @@ -21,10 +21,10 @@ "tmgr_staging_input_queue" : {"kind": "queue" }, "tmgr_scheduling_queue" : {"kind": "queue" }, "agent_staging_input_queue" : {"kind": "queue" }, - "tmgr_staging_output_queue" : {"kind": "queue" }, + "tmgr_staging_output_queue" : {"kind": "queue" } - "tmgr_unschedule_pubsub" : {"kind": "pubsub"}, - "tmgr_reschedule_pubsub" : {"kind": "pubsub"} + # "tmgr_unschedule_pubsub" : {"kind": "pubsub"}, + # "tmgr_reschedule_pubsub" : {"kind": "pubsub"} }, "components" : { diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 1e0868e6a4..f0ceb7df1c 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -710,7 +710,8 @@ def crosswire_pubsub(self, src, tgt, from_proxy): self._log.debug('XXX cfg fwd for topic:%s to %s', src, tgt) self._log.debug('XXX cfg fwd for %s to %s', url_sub, url_pub) - publisher = ru.zmq.Publisher(channel=tgt, path=path, url=url_pub) + publisher = ru.zmq.Publisher(channel=tgt, path=path, url=url_pub, + log=self._log, prof=self._prof) def pubsub_fwd(topic, msg): @@ -744,7 +745,8 @@ def pubsub_fwd(topic, msg): else: self._log.debug('XXX =!> fwd %s to topic:%s: %s', src, tgt, msg) - ru.zmq.Subscriber(channel=src, topic=src, path=path, cb=pubsub_fwd, url=url_sub) + ru.zmq.Subscriber(channel=src, topic=src, path=path, cb=pubsub_fwd, + url=url_sub, log=self._log, prof=self._prof) # -------------------------------------------------------------------------- From 6e93dedc8e74cfabf7b9c224671ccbd002e2d9c3 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Fri, 21 Jul 2023 12:45:54 -0400 Subject: [PATCH 073/171] Remove MDB from documentation --- docs/source/README.md | 2 - docs/source/envs.rst | 3 - docs/source/getting_started.ipynb | 48 +---------- docs/source/images/architecture.png | Bin 59887 -> 86515 bytes docs/source/supported/amarel.rst | 15 +--- docs/source/supported/bridges2.rst | 13 --- docs/source/supported/delta.rst | 13 --- docs/source/supported/frontera.rst | 13 --- docs/source/supported/frontier.rst | 18 +--- docs/source/supported/perlmutter.rst | 14 --- docs/source/supported/polaris.rst | 91 -------------------- docs/source/supported/rivanna.rst | 14 --- docs/source/supported/summit.rst | 18 +--- docs/source/tutorials/configuration.ipynb | 6 -- docs/source/tutorials/multiple_pilots.ipynb | 6 -- docs/source/tutorials/staging_data.ipynb | 6 -- docs/source/tutorials/submission.ipynb | 3 - 17 files changed, 7 insertions(+), 276 deletions(-) diff --git a/docs/source/README.md b/docs/source/README.md index 5da0633889..8dc2a59ff7 100644 --- a/docs/source/README.md +++ b/docs/source/README.md @@ -32,7 +32,6 @@ RP's documentation uses [Sphinx](https://www.sphinx-doc.org/en/master/index.html ```shell cd docs - export RADICAL_PILOT_DBURL= sphinx-build source _build -b html ``` @@ -55,4 +54,3 @@ RP's documentation uses [Sphinx](https://www.sphinx-doc.org/en/master/index.html - Branch name - Requirements File: relative path to requirements file - Documentation Type: Select `Sphinx Html` -- Environment Variable: `RADICAL_PILOT_DBURL` set as private as it contains auth tokens. diff --git a/docs/source/envs.rst b/docs/source/envs.rst index c992ca9f9b..cb062e0460 100644 --- a/docs/source/envs.rst +++ b/docs/source/envs.rst @@ -24,9 +24,6 @@ End user * - .. envvar:: RADICAL_BASE - Root directory where to save temporary state files - `$HOME/.radical/` - * - .. envvar:: RADICAL_PILOT_DBURL - - MongoDB URI string. Mandatory for RP to work - - {NOT_SET} * - .. envvar:: RADICAL_UTILS_NTPHOST - NTP host used for profile syncing - `0.pool.ntp.org` diff --git a/docs/source/getting_started.ipynb b/docs/source/getting_started.ipynb index fab57f1880..b14f80d8ef 100644 --- a/docs/source/getting_started.ipynb +++ b/docs/source/getting_started.ipynb @@ -27,7 +27,7 @@ "\n", "
\n", " \n", - "__Note:__ Please see [using virtual environments](envs.rst) with RP for more options and detailed information. That will be especially useful when executing RP on supported high performance computing [(HPC) platforms](supported.rst).\n", + "__Note:__ Please see [using virtual environments](envs.rst) with RP for more options and detailed information. That will be especially useful when executing RP on [supported high performance computing (HPC) platforms](supported.rst).\n", "\n", "
\n", "\n", @@ -131,46 +131,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## MongoDB\n", - "\n", - "
\n", - " \n", - "__Warning:__ RP 1.40 will **not** require a MongoDB server.\n", - "\n", - "
\n", - "\n", - "RP <1.40 requires a MongoDB server to execute. Without one, RP will throw an error and exit. You have multiple options, depending on where you are executing RP and for what application.\n", - "\n", - "### Executing RADICAL-Pilot on your local GNU/Linux workstation\n", - "\n", - "* [Install MongoDB](https://www.mongodb.com/docs/manual/administration/install-on-linux/) locally.\n", - "* Use the MongoDB default configuration.\n", - "\n", - "### Executing RADICAL-Pilot on a supported HPC platform\n", - "\n", - "[Contact](https://github.com/radical-cybertools/radical.pilot/issues) the RADICAL development team, and we will provide you a viable solution.\n", - "\n", - "### Configuring RADICAL-Pilot to use a MongoDB server\n", - "\n", - "Export the following shell variable in the shell from which you will execute your RP application:\n", - "\n", - "```shell\n", - "export RADICAL_PILOT_DBURL='mongodb://login:password@address:port/db_name'\n", - "```\n", - "\n", - "Where:\n", - "\n", - "* `login`: needed only when using a supported HPC platform.\n", - "* `address`: will be 127.0.0.1 when using RP locally.\n", - "* `port`: will be 27017 when using RP locally; possibly different when using a supported HPC platform.\n", - "* `db_name`: needed only when using a supported HPC platform.\n", - "\n", - "
\n", - " \n", - "__Note:__ When executing a MongoDB locally with a default configuration, you will have to use: `export RADICAL_PILOT_DBURL='mongodb://127.0.0.1:27017`. No `login`/`password` or `db_name` needed.\n", - "\n", - "
\n", - "\n", "## Write your first application\n", "\n", "RP executes in batch mode:\n", @@ -199,7 +159,7 @@ "1. Submit tasks for execution\n", "1. Wait for tasks to complete execution\n", "\n", - "As we have already seen with `RADICAL_PILOT_DBURL`, some of RP behavior can be configured via environment variables. RP's progression bar does not work properly with Jupyter notebooks. Thus, we set it to FALSE." + "Some of RP behavior can be configured via environment variables. RP's progression bar does not work properly with Jupyter notebooks. Thus, you may want to set it to FALSE." ] }, { @@ -231,7 +191,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As with every Python application, first import all the required modules." + "As with every Python application, first you import all the required modules." ] }, { @@ -257,7 +217,7 @@ "source": [ "### Enable user feedback\n", "\n", - "As RP implements a batch programming model, by default, it returns a minimal amount of information. After submitting the tasks for execution, RP will remain silent until all the tasks have completed. In practice, when developing and debugging your application, you will want more feedback. We wrote a reporter module that you can use with RP and all the other RADICAL Cybertools.\n", + "As RP implements a batch programming model, by default, it returns a minimal amount of information. After submitting the tasks for execution, RP will remain silent until all the tasks have completed. In practice, when developing and debugging your application, you will want more feedback. We wrote a reporter module that you can use with RP and all the other RADICAL-Cybertools.\n", "\n", "To use the reporter:\n", "\n", diff --git a/docs/source/images/architecture.png b/docs/source/images/architecture.png index a550067179d6b62259a4bfde118e5693e156a071..1afe03efb054e8a67d88df4d092598ab279b5315 100755 GIT binary patch literal 86515 zcmeEubyQa0+U<)75(1)x(qRD7AYD?50#cIFE!`c`f`Et;0#XX1bT>#NEiEMif;1=% zdY_GP&ez{}?-+O7KQ7~U-h;e*@3r=dC+3{bdZ-|O0}qD`2Zci6$=sAwLZMEkqEP4# zSeWn^yBOSX_&;phn;H%%6z)0Xe`u)ZiRa;}@8&A%j_Pu<0){qLtolYa2F9$eR<>|8 z3MC}!YO8N(VeCj_U~Fn`EljsoRYyl-ZX`^nc1@05&Q`+M%>1Uiy|J>pyo#Z_g(1HY zou~+nkgEVZz{=QBpT^b7(%M15RhaJhxdQMz@@F z@UwET7_zf-(D3rJ>hl?K^IqfP=4YniWas2!W9MSyv0_ z8UDS_*2&)TxN##xHe*X;D`RU%2e_8w@3ppOHjXw9W;Xx9qyK*WFB*W+m6QA1#=qMa zE33avI5TsY2OB4QLt`mt=q5e#Hnsv1_Qv{-HufquHkN

5zG2VZX-0!KcE(DZtJxz|GGBpX}_v zE|s$}GB6;Ld-#6xNNAzN7wsd9jh9fQgO0l|H<(xs|@DF`KQm zDILw@521`g|-#MkXfQCi?okhQ@qLAFr6VXvBpMpzyEChzdZuK9_wOe3=jQJ zru)}r4mKu^F8cPy*G*xx{u>L54Z6>U?8RSi!uGFU^7y-d+P8l$2V;Qz`qvP_KmM9B zV{5Qq_F$;+jh=6z&}3i9NM2WQO}#!P}6^5^GJhSeEG_xp$JP(9$!?{syB zgrR@0ijZRX^TnnhN$Q`!x2^sE8~!_^^wfar-sNaCR6}E*8X^p!2OHC0NF$15xxh(wgwEOaA)dj+2XH*Dq9WHE|L8e|idDjiCKr7D1t#oAv`k zLv$A}p1g43feU`o+N|YN zE#=(YoO-btVM9YhQv|&mvpC_eAH2t7)>U|WdwVEfU7sW3jg1YuD*|(`Vs#ommRJO? z@mh}fW@j@;ZaZ~d8>SHzrDl4DN+*!Wj|7)JfP^D@8 z5(L5lpFbPS&d;ZpmO3jq-%Sv3V#<(DVsbuR_T63hk#xG+fdbRB@}uoBucu~adVf}9 zTaTso23!}qyX)GSz)R5F+`PNl!DiRNQpT5kFsP=RT3&wM*U5H0jIH)OGU`Ppc%flo z`rKquF)=|E4)dQIaM_4T-)3hw-Jtfm=;`zGXs1z+{nOFT)D3HE*5?Tcgy+ur!=N8- zMEZPp?X^>LULAi(?Q_`o)lTcjmLH3;1zlE~QYC^>sQV>kSy@?eT&DP(TwL1L znf+h$ot9LINJ-(XR7X}nTI4k^^ra^a`D~AxqN(=~qu?vtNGBJhyGiN8x;_)e!&NY7 zXWQG`YeafrQa$(PVi{Oi0^jM0+1qn=i~jVyXWxEFf$7dOvwJl$sP(=1WW)Xpxn48w zkVlUsO-wH5)@*Ag*T(#~SftMUneyIcbW}iaFm>LWH=)EVSijap6m`vy{Uz5=4^HLV z&)pjL+L>TrWo@zX{zeXN_w!O-owNE*>6rC7Osr&H8>%l|3Bd=eRXxTXNw?W*h~BE@ z1u11UbtegK98CSx*&PkSBexyacEOgS^neAs`#Fq_-FloaGgNZCN_U~mrYg(G?>a*KOjZzQ8hglGh74nD_{lT}rI)VG_Rv^sCxYMw9+xzzoE>m8? z=dQ@7q@|r;t6o0=d--@Wx}IM{`F`!sG+djkX^2xEzUP1kt7s$7=LmOjaFEaYkgxW~ zzJ+X(0C7o4N%`KF1h4Hw>xwy&<%ue_UH5~PvV-Cz>GXGVvAPjpUhcVneYN}JV9oAx z6E=mA>lr5}r;&Vpbks1qPv)yv(`jL>4OMHkVU58AGffZAAy1!loxwxZQ@V^#7t8H< z;*v1iwpsn0dy_B=t7|1+URqi@`Nh0Hfk=qc(hw~@J$jY%O2a$aF@{w; zl=!MD%D4CatyfC4{QMebHHO?|1$Hyi_rO$P;o|x$Wg!b&Qa0s${sug4YoyX_yxb1i^sE)LXPcUuQ2C|4 z`!m!(L--xOh^6=Ob$Jg9>#D2ANmw7S9jw~%zzW7+3J3`JzE5%W>eY>I!8Mx8mob0t z_4rU|CT9)a&(^6Si;s`zb6hwx={8R}T``u6hT^lI#S;0kYdG$GumZa<0JcXWzr*^t zLk}9N)N=HL1wP|H3|6G2R0akv37?R6`=waaf>%uSl^3oJ#@CI$et3oQZD$w$bRn4+gy9W%TEPMKg$^!rq54%&c4m7 z_3JZh0$;IM*pUc55Z)O_M~xJlM>qCZ<`l5jd{>IasQA_+N{vd??3W=vb@|FK^Seo} z71gflSlHgyCMGK@n^!bj!E_NVQ*%Q_RW+@)cGCPXZD2s9EZ$P|T+z++KC_fQvoq(; z&Dfu@OPFkV%EZjv^Z@;of`-P!{Csv8;1sxJ*iKkgQxm8DBSt@^R#X=cjhD;l=qM{S z0nDXlj>#X}ZT7mzsTn=nklSha<3}76t7$v=GT3^Q1#R=Wz`(%rmC~`q%5jK6joAMhVpI3~FCBeEUygY6YCc5z@-soZ7>gOi(^ny?JnDlEK=u5O=} zbdelen(W`zDz$da&5e2&!15>lJggrgw)wu$XB~#i^z4>d$+xMWKgx6ubW83Nqn7F? zjtUSTLqwzpmk5)1M^Ny8$UAax*dC|$T)&>`6m#|2vuF5IW14p=$QHEH`#vSu)PCJ4 zFE!A;e*Hnm^$D-9V6rh$o}2AgtXr+!!2zNr3Q+qKhbjCH&Aj!NLS4STirp@sG>+BA ztUqFQ(<-xe+O4gqxZtzbp^KP3dVc=V6?G-V#x5>ygYh!h_}q*d@2NDs8keJ0CeP5z z&BOD#pp~i-{Nz2Vw{R=;sv%Z79ZheAXMWKx!DF@J6bA7d9;B>&*2CU7@%xrA5QqKgDTi3%= z^DRd?W8;w>9=!1ciG>h1o$T|2zng!aqN%mD(TrO>y-)9*A4d6V<diZMTEVZ|u9OUm=craq$iGsWhH$0j&E2;O3JNHgLFCnkbv$w;^qVJ_M~ey! znnS6QujpA34`@qEV>&}90B%ozX($J}EeSmxxPL97aW zTh%h>JHd%0YxJubm)5b?%iy&>%TLNize@%GQNfw_2IcI`y~@aJtASIFWfNm zYWJCPt2b|nzDgJS6V1Fn-})u)&(4*cb@`iH5JpWE{E6iyCB0i8v_Jgy^otAN)qUkN zmv)>6f^kn})R(?Pr-YBzD=fzjQKRS#$ zX#34*xXx$uOYSE~NM5{q+^Hy+N75&K_8I+#bNub+<)<&_3w~SZTTm~?qP!j3kgdvv zi(+`TkjP80A57)P$kHXs|1w4@H2&q&Umt$)Pshq6w!S1NSdY>XC0CL^uH%=KL=!V9 zP;U|}m&HV35fF&#+`UVDRr3t^&&?1{QR>v4iBnQ`1zTu=C%!y9r!Rl&gr7fJGm_sxME8%6uGBlu{=Md>leT_6@-f?pIb;DsV(}9S^yii*?cU z-zWUOT7{)qMZzB68z1doI{4OWDDdVjjhAsUe?DSlbhN_q194ON9I;|TupC)6PR#gW zdXi{7IOGD(>QAxbxz7fV9sKB&4V@#WyDemjn+cY(M3(N>V zs^*dKc1RFhciw#YT7yVRN@2oB)F?cBp|kygbr)HlmgP;xFd{A)WMY+&x`5kL_}!WNX>5zdsMSE zZ?9lg99XN<(c5hf>mE$77_*xw5)sqReo258kweZ?ANOo<&8zJ+rex=HjJ+uOG}9=9 z%Z$eGk!-xN>2m8$JW(({twGCa8ou)@7Q>pYK4%$m38ONbn8n+h`50iUU}Su9nS4-P zP4Q(w#gSEKd9YU2;Y7>!te~F-mNHF4p6o@8(?+x-_w-kh7z!@t>~Fy-D+}{ zxKQoPQZ!M7 zzNm<>sE!GaX5D0E^3~jRreFyYwp>bRqw!Z858*9Bj}G)7hWNIJ<=)tq?ax4CA|`5X zeteA=*{4JeU0I7=yay#&PYaI&$BEZ(MERnJU|!eFnG>`hlL+xFjeb) zB+qZWIlZ@WIXW%>OyL)kIAtZhhmkwlG2TDAXz6f_j?IH&GK0$+CMwRPb6uu`9wlbW zE*pIr4Fzp>erV=1#b+)kP*|QKa5FX*Gx<9+rr7vF=9@;Sql2U;_b)d;Ku4jzH9ef1 zU+2UnR7^2Aj%W8JRdoqa`4=zkIo6euU~Il&GAU4G(36)dG2wT-dB{A|x+54i#kFy` zD-ob-%Uk4I zG0-=i?_+Y|qK4(mr|ow%jLr&)7Iw=AABDv<8*N&9Fq&|ZA@{DjgoQAmez^xPgVjYI zIzi{o%!6lxLz+9H_lb&bUb{W~V?X+jojiL(3xD_wkpNY;y z4EFbNNr_uhbAO!=H1X(H6Ghz^xG2nJD7~26BiASJ;sT_sPi0BS z8I3oMN#U+589_hx^yYsasg9qs}PiZtHk|=5WOD$QqBv!oq*?#{sQ{WdMgD zdC;ZXam^yPCH%Pyv(=}g|Gd*n-GYX<%^Ej|lH!w&UuOy}|o5YD%9m}*bE-As%s3Zzaql=4< z?kjb_mZB0GJGZ|S->e{2(DP*WSB6)L)3e|g;kBql_hC->!D3Po6BTHU7 z`Ww-o7eXhh0BGr-s^gtpZ!eLMB)Vxt5++sc-(iIEU0+|Hf%vN+4FlloG;d`Jfq?wI zZ?wZ^dJjXy<~QfqyORkz^7ZAH-WO{!i2kH_XH$oF{xmz}0TV6Z>~9AsUz^-Tt9b@;EYYe=W>t2`B3H5m=LsGVuAdW~< z(+=!z4%2@aYTvEzku>R!yRfi62HhM`J9k;OcX)6xFx1ZSc&OS8H{ah1m~^4`gRE%Z z_ZG$s|9t(EIIP=}Cyg{&W^Y7_Mm?vMH#anlGjw70`&G3D+k+L6F4}fkX?k&L zABs1_Iz_ekI2eyKH1ZR@1(>I&hnC9IPJ||VljTpb(52M^;DPL_G+&bMgK7#jdzYV9 zROG5DF!k}M@@#!Z?t(ifwGzYmpIQtpm^T2hid)Kk9ev5zf0XXX4c3e zrmYRFiw+5>TArsDDt#Y*81niGv-m$k;aL|0l2krHs{dKs+InnfkMH^K2fBTF`aB7= zvA3&o$3{mVrlimm7~a&OsgB`!@bp;_CYXLHecIX92sCMWR6Xp|#28EmPobg_Q<+<3FWcT$WMk5|EW0^yU& zn$LayDfoh@?<5yvnN2Ww{nYV7DFzWIUK%9eJ>lDFP!Qr`8-E&ghKkC&`pp|A)nXc= z{GlAJQ}voEfYaJlbmwYUDJUwUp#UntR8ms%e6HUbLCNR59JhW{G5;b=Fy(j!FhY;>y6RmFH*-cKD=QQ#>=wq+`)BL` z2tMwnW4r2K{qm&+`lZtyvFS%PKIoo@JL-Re>*FvxMkcq9=6i0{nSrfT1|poe*2rM~ zOUCs-znmf5Y7M|X_Z?v0R0E0xmn2E_YzUeFNCq6K{BX15)N(n1gIM_ZL49Tcs&9!k zZww9$=)v^vcG-B-kE)9L%UM`ly>2lqzO~u$I5sxjYU_To6YZbHdAv-sjGqh-(OFK5GvHJDU7|aIWF_T#Xv~U= z-B%CxZ14#cY4mS1;Fu|yVZR!@9~B#m;3!<(eCW!V@+$>#MdtmNA#X<2r%8o*K7S>6 z5TgJvqWExCceni0r%yk9dgtFz370klrhoYS#v+hc-iI4E5{2B9%xJ$vHrxZG8Zv$i z6yLW_E@38dAPQ#Z=B7sq3`4@g8av~;MkymPpDu0<=kWTW1Z>LDC_7@%aHNt;@6OI$ z-|CfkoFw!xLMp7c)P3u5PZ|T0-!Eq;X)2HJXffnEVIc8$65B~NJU0X{HOoR8J3XIt zsyS7H{Z1m*(VM&Z-wVQ$=-3F}oT3iMe$_lb9~|;JugMY{k`@f^JE02nHzESsjm~=K zDrKdpFah{ucerZ@K+g*6#LRW)DU(%7ta!Fa4;G+U!GBTWsdlB2)&R|?pi;B+C($a=Jd}w$6 z6 zz{u28PCr# z1t^EjxD9|10;V9O-`T!`0vj9q=Iz@K8uhS`cL6T#9T=GX&IMx#6v@ks3_4a;eAGHX zTb}^|bkbZyK&iIJ1CtfLy&J^ssdyZ|b-m=MAEY@TR4Gq1g6KrQ^#|QKr_%F2jJu<-M8^3?; zFYws0E-o(Svm}SbF`)OoT^@1rd>3O?KV zEOi5(1VvTVAg_b%hfh0yTC(U{7Hj6@kAL{!w^n{zWTDTkn<4WPgY!oQmaTx!Jwmqot@q4P7^L3o5^Jb z-xJx*1%7^hRAno9H6A+x#3_~+-oJmpJ`*Xr|K0V4+5P(sJ5xSurDrsCbt9qa?iDv1 z8=F=F(Zf$Fz9;0n7)C}%zxH=t)PA>Cjqc{g-#E)O#GC^S4 z`4=9nPo6vp*xFtmoyo5QbQ-rg;_>4%2yF>AxUIXpWgtsAa6abo<4?VK*(D{!h@Y&$ zhb0e|A}T)qVP~hT>{%|%mdU8s#XDw&Od2YGwf+9ut& zaf6m1Ub&Q2mt(b1MTPB2W@tUYo%O^VpVrXUbtH*7imh+0&~jCN#la<%Zx4S(hT z3fOpbFLMnvaoBIWcl{Z6AuZaFUmM`m0SSwJ={IIz8wbf}o->IqDmf3^&z|8dP*lNQ z6@nRZQc{5mC!-p&%^H_mIP&M-PSmU!pv$hI9}uB#YHa|Ymq1<4qA3v`#3xS{xebM8?byYGc$3gU}KO#JSeW80SGtP zsfGfui2QYzO(UQc?S1Jrig zO_|Yn!y<#+=4tm{#Wd#EBx(Y#J%KRukKBNgg?b(AZy{(30v|t5(3h8aa7kp+MPq-D zH#mghals`bJeh6YX>eOyCY{GlP4JPEQPo50N6T7nqb=rwNPrl3>s7;#ql3VLSW8`; zsylRd68Wa{Yr7C;Q$fM^`E#=12f63Yo=t2y^Cl;UIOdAn^VzLzM+iIdS#p+7ahKse zU1dBt3JM7LlwH(qui09=YeL?pb$VNSy(NNj$x+AO&yPY=+sS9es$1xrZ14#pRMS`u znhS5eARg=ci7GTy#sx0V51Ux1GZd%$U&^3Rfnnm`d`-XJcVcuVnMvf&Z)y=Q@DZJT z_6#i^?87ly?fIg)af&g^M2UhAg@R~f!3&?Q2r_794;hJeB)u&N$Hdi& z75t=<+Ty*OY}gvXPjuOR?8d!&z}Or1_4cBQMjwIF;Aw0udcLy{Iluiyq|Yh=g=_|n zY|bO!d3K%eKse2A(1e9TL>ON-yiu)mU%lXNa?xjQA-*wBoUDoo%}SqrlH&RE#! zmOD6g3&p=H$j!|iUTUBGnwh-1?Vg{Noh=Fc+zvnL;Az$%pW(E4(?NMh{v{Erks9(% zZ^xylgTq>7D~nXiMaBXM^$IB_=Xk$85|TBsH(ayLx-RXs6Z? z+B=H5e_88#PhuQ{JAz$8P}O(Fbm~?skH}-*B9r%X_Ta7!fBrb)^W4psz-53n3NCh1(R zqsiu6@a@Ta0KK7w_^g;IK%Ut9)sa=Ja)eX^v+&X*!EFpG|978Co!*P_*?#iv9c1U^ zRANwoPNe4M;`^3dO|$xBqjrz@5p+mgT%0DhgAn`*L@STTdGR5%Z0qSEtu&+*be26o zE6mAx7HB<`FPJU9M_B{frly&*xS^%rPVu?071v?9ldF$wfwqOTCGdk8H@;!gC^t7Z zU-sIahYug#{I1W7i8{|^avJirMlbw&5r0To8buDzw1ly3f8{XNHf)GrF^O3YudtBi z)DW$7;S3|ysN5b-{slUC_gsuxYW;U`OaZ{yIq_xX(Yn0HlY~9C`NAhuG&C5AcvP64 zP1)ohG>G`WHD3XBNC9XG>P*-Zx<<-ulG(K=Pze9ou^kxt%{P){P~4W#2fMT zUK7Om#5dUkgyiFdgsYx;N?Gg{L(IV|(NCTzC@Z6*7?_yg7AZ$ZZrRJ{-n@C!+uQ34 zhzI!M$t!MFATo>$4aI?U24n#OM11n1M~=bb4>faw;6Lf~>!zkxKse(E!La0w8;u49 zDIlY4jie?Gl_d+8HehgopwkC2o-k>5htu3g~+<3vKkDJ%1hwJ3UoU z$J<+Edu1#*F7AAr5od8}XwW#$$c8QD7I&$OThg7E!{d}2Ms^#O?xNXrL8x?~0-yL1SH zAg2Ls1S0YyBq9ocK2Zp}6TzxKHb8dy9#jYK#;bmIZr{5RP8JBaLB52YH-`i&p1Zct zz0Yk?^bOu=X=xDRQ6XS9zX%neT|gx2mnLZRDTh`IrKF>`bjkuS+SFsfe$qQeG5umx zGwr#aJ7PwgP9$zp7{I2nU%9m9QSS`+zFac?Yn8{bPsN6YzHIT`ZJ4Ty=Mw(MT$2mC zb3a)&xy^C@G(=n1As&%HW8&c%nl_mvhnNsXd-wkR`)9p}Ahg{!wfHCMOJn} z63Sw_p*CTLy^7apWn~41h8+h72XVCEhV-kj&t2wwbd4B>&!L;YH8nvS0E`GKVThb? z^h<^7KRWdcrO9UYW6zKMxK`#fzCGx*oF5T^pXp=?QQuhdC?uzR?%x?8as{t@0vcFcjiZ1t2|vU=D$eryu>nRNl4G>KG^{d|k6)*(o ztx}}g150|Jyr!+kU&2nicq&uu--wP2e1 z^hvLsK*x$Fq3^?o6JTrf8-w^C5zpRsuhB8Pkgmcc%yb^mm2}AVVfa&1Q@yM?(ubG& zhK7P*>=6Os(zOCgE(QiH`0mMb4x5QeCsZ~@I;7Rn zWA40&XZcieCL{`&&9M!7M?|jTKeu^b>v&)I!eN-=Gfx>=`Ld0SoX~yn?&E7bX#i>J zm2XI!6c|_R-)|!$!;}M57{_Z@|Fw>r)LD%HS^wq3lRsyuu{6u@Te6vptw%m7;>!Tv z$Km-!7SuLR<0Z9@=zd;ei%mSq`7nIn<(1v0zg0xEE?V$Hn z_=l?8WT`qf9v;~B;tmq28cIO{0Wv@lj=be8TOWmlJXS$_Qw0McDJ@MWAW%7l!BGG4 z1S<~@Nnc+d`le7LgjvH++Rp7CtOYZ&;DIYTDY~s*r-cp#eM;p|Z+Jib&h8-n_Oqb- z_|VJ#8lh3Vn{n2Pvejv8zRje|PQIX?+nJ&_n-CDol03j=b4`?Z694p8v6B_`;o{pP zD)xzKo9;w@kqX3{0bu|VwgI-7eD>m+BOYcB4n!mHSj#4`vzhsMZrgne!a_$z&>*m1 zxq{%A^Vu27^;5rhM@DjDYow(gBuVY9lqm?WqkMB4`pWI7<{#g` z43YVewHqa+!qHxj!t&%q=1Ss^FV!1CcaB-^hE-QdNviy{i23_NLTRaDb_-`3zVrm& zOMif1p&hY}d_J`u)|{L*2mpkt!T&a_-Fv!m~HE;8Z_^C?8Aqxh4BBc@M2 z>_y#z8D3N)3CFyiR}++yYi%7g*U|T5Kv|@HS}x<$u#W!+>i9>}0TZtDenWYuh;STr zoQMTT*@9kKNb3btln#H*Df5lMk82H=cuS3Ru=Z?`AQdu1RBc^d7%(=NYDjMyOk0K%~CDpSht*DI+9vaNU3Qja!Ty+kfm3>3wg zboq^nbo8Yw%+3>J{iSs>nSnxL&hMtl)U7xhG-k77e!K<=_jhL$YPQMrxK99uRAky{ zzyB5e2jpw7Mb!h2`E-U946iL>6`NXYW3>lE50xHvyM5CupCbXjGR`>Uk7y!evJhBK z$1k(>pBh;V_6iJr`vlIVf_^9?OIaTqm%Q)~mxgG8V%lRQ zGWx_!*wr0g53*;zdPhMms>dG^^70pbPUpi;MtJx&6HBKI|9z5RV1nVPTbBgpyZnEo z2J79rO-}abS?w`%(RWe+s5eid#x_#2p28nYuo15?4pZ=4{AaEROiqWmKE|VL3dmuv~ z`nCAoWK7ugM!a@_Bo(l(A^s)v>SSbO&(_3XAX2C~EOcVOf2U~NBk$thhI$HxHcp~w zX4|jO_J0*Zgm$44!DkL9J%G164K#cq`6n@BXhQOL6O6k_E;MXx!65xg3M2KXrdDS4P zt|}_lZ_0xJ#NNH2}upW_$^j1+i#H)5GIvhB1kn4H=R3s8bhZ1393-R~>9oO5Z*@94z4z}oy@PQu1Z=7a zQ7P`<NCm42k!*sc1z8<>90f+=#m;wdgry}MI^M2SM4BIR40;B0(yZeW((K28U zKu`t)o8Q!Wz`3TnTzLF< zpU-Rf`RhLyx_|<*blZ$n>HSMc)=+vc`1%egbf7X2N9Ff;U;iVWvd+Z80Ms0X+Dn!A zYNUG>rHc}AVk3UB~qPSsHEL8 zapsN{5TMwmw*4Q-rDic4)Lh6$3k*dj^0G^ZDt{`bH}!ldsr&I)isEBb;FQVb!S}fA zDd*_W&dcV6F=nd67v}(FI2Le(RO>zG#)d)qEtndqqgG(jzjgbZ=xrI9Iz(ZIr0#Da z5;+CKAf=;r8WoP*WQ=AjV&JtdUUgW# zm$yy-OqS#sPLhU(FX&Ofbm^E9diPYlpeF27s0S)EASQ-{Ld5et6n8ZPXcB-BFlm4l zLw@KrsRm>Ul8n92HFwtT1Hx=;SeLRENFah24~wBp+lECJ8tm4 zBeOP>cZ}bONMzuT&0=055}h7?#zl#>mJGXBQ)d}3maht+#CoqJyI!lWSlJo>JclTV z%phm?MG8znczz1n21qP$-D&;Pb)+@}YV<~mObH&kszP?Dmb!skouhcpQl|cQjTYSX2~_A}zy*v%jSDz0pWpoU z4I}^$;E5z@A%~qH9Lcx50CI$_PySRcgHhLb#J5HaforPSX-HGTaQr160c7CqAp5vu z^st7XjvEHVppYcQWyo)tQRByZ+Io9NWqtb`rcWhI20?nHmPtDqd8+$&MU?Mk2eJwf zJK*m(YS}W~7`y@eSc4@`mG&CQ)A6UM0Z5tZ+wU^t{tQg}NR3C?h+@j%{Q{`tfC*M# zW_Z$DbLMN$32gwn@Tr7CfHa6KUDKg4?BJpM*+1nYDr;{3IQALsZb9q*#4YimA6iSxp{lW$Ye=s4R628 ztNpl!)*m?atws9$eCNh5u@^E`T)>#k&d$AkM z%of`b6&JT)Ujx(#T&M>k{93%JQ;4kJd3odkr*2pEhZ|5028Ac@0!ej3ker>HTV+>6 zQ8DVJ*3s#wATZI_$Wo!3Z2X$xp$yZWSNDSqmbjFIL(z?c^VjY_Z7lKL|C)KgNk>?r zhm9*SHfGa!_8j`$P!3kbmTKGnNT{l+R@mdEyx_FgSYKw1igR@E7@mDLVPyU;^JgMA zFNbZ8*`&IELPKdcXWPvPb<+;Yw(vV-PizVZr0jo7PFnbDML3o{rQOst>qmHn4h zvoe#275OJ=5rwx7$Z4p(chOL1pw@zzCTkG`RoycrR1e_EgH>lw5o1A1NT9*byabuVd21^IyqfKAhqZ#chY_#7yx2GihlFvO|g6T5^cwU z@frZ&d07Igbe4;HS_%wX5wNs)6Yy#YcXxrO+R3*{i;MMuT48m7H5AJ-hZM;AzoYRK zbzW9KA1;k(r$B46u)Hj3X2yWje*z>6d~~K^Nogt0#5^pS04I0B+7(TwS7jEI`8pmq zA}9#EE#P91f4Hsf0Tvr&fp;d^hR(!O-;>!r6VXGZ3RqfT<6rq+9_c# z5)_SdIav>z7QOZPhZ@z31}L$6MnXg5AM4`tfi%OaSra+w^(}l0;G`CZ?gy7R2$5{| zc>=Ad`+$Iyhgw-$#M@l)gPF(!&o|p5rtLkaDu4XkP>b<4{k%sgtejl9*H%M47gkyOf+P&8C=-L-xc*frS0#F9JQL7X1wMetcP1q20C0YL%@2QVJ=K-HR=tLW;o5&17y zbmXeO^->C04A;z&2INdrOAAdsjG)vYVAPTX)2+;un+%pfL{JbWI70+A%(9(ZJcLO= z@O56^bXbcpxlpq#e)DEXegyCmh-QJCN7=(e5QIdjgZCA|_aJJx7sBMeB_nf-LNK{} z;a^pBbnuHtneUN;8@ATMU=l4Y$Q z?w7EhYIbq*yp!hyC0WSv2l8A$fb)jiRPNpdEE0{&v>U2qB`qx8HWsKb5p$D;OT@|@ zEY+l@q9I1e!!zw&TVmqriB@X*g|~S~{K^1ZVZ5}AZ`#ReDnHs zTk08v)dKIAUR0#)`T|HS@a~ZkXyTdu{6r~`!h@Is4;}-9gw?vZupnk@o1dQq4im@- zN*nbh9#TJ*Bm$%$TaTE|@2uk>&3J!XKjAvBTv|UG1Z?D4iwbHr50+2qt z1GWwP83Ogr_7!*<_q-6s$9N4uE3glUL`z&hD}@ANC^KFB{j3Jv5|>oZJBf4@8#4#Kdvj=1**y53EzR_dI6WORT4qItMKPlz*sQ<&4b_ z%F{5|3>Pn77I2Kl@Z~UU(OVqI0+<(}3kwW`6_(%T=0YWO5I0DCJ7aa=oD0;4_XhN@ zpa$&E=QCMwgkNs65(;7xM!mFqjE}>IxC%IyqW0$x4-lPEL0Pf5BFMDP0^>r$q6JY! z@B2&O%aNkOT{keLP?&kybE;p~TWR{uE7iMqpV@W;jkd<^tt=^d^23J@D0N)5ZI;31 zBq6sCS#RUPyf{0T_PeTYqj%ql=WNbG4eHLcgg1gfBtRT6`||ao+Txds4?kFEkOM6} zb@+uBpXEE8_QC-NE=+r**qWo-N^UkH3a<}fEe{{>fL5rvwGfCJO$FwDw>cE*X0yNX-J) zv`ZAo`0Zado_5Z4w8a2t1|d(L z0dkjSDF6eRK?+-EvcB`9{n|5wxL+Wr85R^AoQQae%eT!!SsCeMWXO{cc;ORj*pPKJ z@2bgCF6dsc1F^vZI?|H!EDzuF@Dc9-I8dn5MfhZ(WWe9Oe)FaQ zXxvm4ra;z5rPiL?6Qd`hyvleEtSu%+NqrCB{n!_TMcVQ0zG$PaZl!>GdY#nk+CyQ4 zE5G&1Pxi7yoLxcYmdDI}l5Su`%IfmA>$p}ykQJP-PGkv#8PI5R~2Y8DX3nX8(}SpdWk zrphp+PC8E@GgN3jUZ@1w1AM4Bj1C74NeMD$WnnEC)e~!GoF?OX>>bseh8u(P$wZH zTV$UE-*OhBT0|QS4h9j9f@D_$s`TMtF(`XL&b)wbfnNXO#fw<;o6rs@s>MKB0DL9{ zu*~DG7A2a&e!)i-lw2l{6qwCHK#J%@ zB*Fqoxx>SCMMeE9V&kRXjC$@WLT~|7g$A)4+zVnLq%u9X0~S|i!D~x?*k3@u+wQLG zgAP$FOC=5D2$wEhfPYfI!)jMI<6hbJ@n-2;HgNv&zLIoxp zyniqWF+@ReP=f)W2AVo9g)rXb2@sAzXo{+bU<>{vB;1uvN=!W0`{728s^Q`81_Jq@ zmiTRBM^!0q%gajx=+a>bCv(E_*2%r%brUYaOzEaid%c_DLe_P-O zDdM>ZqXfa`ZNX3*05X8YfBhp|FhOxbZiW3k`uh5OaFzrj8mU;h_3X(LKM=Q$8IzWR zF;0ZTE`Wn|SUiEHu4O9soV$H(B5l^5AC~gt7cW-rmoI4TWK2}KR8jT{fENR?8ooA8 zQBhF=WYwTvYg%y?f!!9z?~u{I7yrL_`|@}!+ivY!LXl)j5lTWbkCk~Wk*S2tWGFM4 zGZZ3ZNTxD`L}to7&x9oNJX5CIJad2R!n5D~+k3y?Z-4)Me^k%IeP7pko##5&I@Yn) zd0bF&hdwJsW##dWNFais0`I3#oVxu05(O?Uu0i|1A9$XUz-1J*wHH@r@_Xt*5yhg& zg%!FZIDi3>aj>R;cl%$5kF+x6T4iK7?#JN!$2B|-wfTo;fKIW0P8``0Ue|q;rYA4! zS)|e9LYEK^N4@Y*AsED>yCf*2lN2;AAg^CDbyaPEZUQ9R><0=B@F?^TASr_9xjT}Q zNZOwQZ6gvA*l=;N)+gM#juo3nzX3l;!gaPwy9XM9314ro`+jm8Uz`S2+FjHyv1KaV>Eb?TW`a{f`fm}RDCb0IDmu<-LpP% z5B5Gdgktc-2LhmkPKyiBO#}G@!}IF_0Rgq(5VT@7EHmv?vka}Q zI3VKE+o+GE&lz6LgmzSIlA>xTG!ld8CRjeMvc==Tkp`!?^stUN<|0Z8Bs{b3u@?VGcVVs>@O}vy|!*&#}_{?vI89=$=fwI@7 z|4|RYHQduTrGh~&@>5n?+MH{up@^>Rp#&qXa@qQMaaurJl*sxu{u z%M+_+v4n_&vfX+vgG1XIGY6n@~>z5r!Xz{FN_{4=jLI2Ercz*=YUfX2KP^|6PggpT0Kr@LRUJ zaMb_k2t|3rZpdSXt{Yh>)NS^P!P}@JUv;SE5FgcYwDH~0LcrCx@^2RT2MffgYM%!C z4ThculZAedpHTf1ZsT$*g?b7_)Qb;>O(b)|B748m_U~T*f2pi_TiK>MueN%$!u8ym zY-$K;5D^C*W6t{@aZln&C0`a!%&$4SEJO(eF;kz9>cKl2N$4aD&kj9*I#?*bas43sD@k&G*mx)uh+&kqjTs(yJ|h8%;>r3t z3MH%j4sD-$*gTC2roBz?_Fwv@Aq|>TQ8_wo|6e5t=-7LW@>iaOr^ohwag8wLfifIt z0#b&Pxi{0`$>P>FS!FsM!J?h7{Z5~ODXhE+Yb$-73Qj;ADV#M@Y4Lv^ZV8-LzT;DK zfX(Lr_B|JPgtrx3cJ{aDb-uGOggOk!a)!45`FP300jXJ`{;63_XD;&O8Z^8p)=@Xl zu)`!uzllpSb>DA-30D3wz9~mijI*A<(_psK~aPHHiyJ@0;?=&0Vtk zv>Cmt6aRC83+I0j95{n}ftySB&6~eE|E}v2+SKRUg+XkA$MyCnWif5#6Y3ftd(_FC zoJ8;VH2mipooo!jc@*lE@J=J+F2UoHZoP{AAGucXmPb`IK0y(oP-O-kBPEMLlcAZx zBJG6+=ayMR>>?A zD6c@n0I&#fio)+mYQU|zc6FjauKWiboZesYXGfu^d$i}4-S)zoM)ke~&ShewPP(qV zQN@lP$TVp7CDuL^VR(If`KYdE7yhL;ka_!1Z@}fuvED%H;qHRlfs*e#$@4ykJ!oYs z_(0zSSqG3}7N#ch-xQV(%6Cf-`%;fLhVZ8fj2q}iTxP!^v1AamUJ~Y&o}_A~;ZV z;LPOb2enaYXxQ*jzHV_SHL(1YAvO-HiT=r(z(t4ia12I#y%HN$jDKgM-YHrlH7dY% zTCHTt?z!mtjEF>zi$h#~XjeBK$oxI%S*EhwnnYR<>5r1;ZC~hy%g}Kr{Pvg-(dyZ+ zp*{EAB4%Jw0t$0tm;?UT`^!PWw8CdP3aw4{7X>1_X2rq|&w(VlF!-P3iu(QgSFw}r zNL!G!=V${B5?4TO2HL~oD|RiiZ13{^?`j|avj75Rg4^brI?AJW?3rMN+1o-rdZXo3 ze=O8t%Z(|mPo0_-wzmM;^KbX13Q%z z645U+a{ZyKZP9vS?u^Y&*bTkHefzlS;3o`;#Lzc#w#zgNdOqODbz~Thtj)^TOXTE= zdutsYh<&pL% zFgvQ9(QtTmYlKumcRWn>tfT)ajq^_CtWR!JvxJ0zgh|a!mwb~9?$7TJ1(urry-ES1 zOF)9?5`@G>O-$&OuPL_-em{)}yP-xKg3o<^V`cEHneLkM4`?9bJN~=r2>|o*Ns=6c zbHp-~hatx6OSkmkem(r?jB`h_0)?Liedk2}@fOuFt7YoJAkA2lB8+~-sE=wz+G-}aM|l8{m(3Q3(ol{jwUBa{p8 z&K^mN$y6U}%F>{M!@|4?srY=O8$}M5pbA)E%9jPl%%~#n>9SD4T`6zZUW$x9)JMEa z0;#yy)ZGvxklce>Y&BQOBurVmB2XysuZ%e8V`g9C_D$w)Yiv9hzPh?9B`1zNcJ@){ zh|Dh&dBOl1T3U8}5=<_;I5`D{L~_TQ$k#A3lhLDgmv@R#hha(UdPW?&r3yI!gF@^X zz@=}I4){Y+wD!uQ=tr-Y5^x&lX8voa>BdPO-REd}>148LNuO~Pc%AToPV&>uump4r zh&VRo%|jOuLJzz(ezT{O91{x3>#|!et{@{wIlgdwQIWbD&HT_Z$g_EOYeT&ON&XLS z9)dY)$w=U)e0x?Z1x`8;cbqx1%jRT8h-lhMU>#ZkeKAb99wb%jwe;9P&pl8((&2o4 z;;;$j?sI7MSEIqt_=IY>xB}Vt@hov1%~CsI(()%W$nK$%h(dvm@>%bj$m&-esq>J; zxMCS)w*?qOV>GAh&hs%B`zBC#PN+K4Qzwbpk5tT8pLl%&Mg13Sj$a?9PD6qlNH#SJUh+FT^6+dbC>AZ9 z>*LZFt^BQLi!iE_pyVXUbJAA@@15jf7DKr*KN5T&b47hI}a;mpHG6v_r;|?S`;%S`(}xpQ#dGrg>Ym4@K?yx1RYIH zPG5mnDk-aX;k>`1dP34=7|_bq<8PgYZ{?j@PaLxOVsah1-rn&tX+Tea;zFS6Nzsu! zL99S6GKAv6#W|8RH)-_G)u2h0ViHRCX41Hkatqb%9}e4~(N-0^@paztIh?tcEGd+* zSQ~3BH`X3bC8jFpV{z+M`)161zvbm3TG3M;Gg#k?x17#Mag=C zu8=~RQ_IBXgC{v^lDjZex)V!hQL;Tf`XS$Z|K5teO{Cx-?G3kjFo25wv$I)&a>M;~ z`V)1fp^#YtRyPK_VotAOx1?KUS*iQwyTAvS2Au_?m^;O8v3PS%16ZGu=5qKKx@AvV z)h^Dd_1tbK78<_VJ}Y{(vl`7TZ?mG#`$kL25m?O%&Y!zQ0LGV`Z|B-aMPQ2cm3ooj z?hN)8rKmj7uJ#!WS-id;>%rDsO*W%0E7O9{F5SnBQ(^j>YBxO;N@yyoTJ8I^MA{Y= zF6YdOULr*VVrz{=7#=2?T(312x#@(Pet!dI71y%{^{8^k{bLDHbpDAg>hz&D+{ZWf z1UMVXQRS#?+-`j(GorbEQyeSO^q~#*O^HmbBT1Pz=AVab^59}Us8P`#n^WocH<8Qv z>y#Y4&2=&3l+TOKb!}a3U)IMe#Wn5W(QwG)AM5>XqTRzOGP~pE?Jqc-A9&9v?^-Vf zO4!pbJ^kSOUU&8F@a|7rR({x_P;v?B1(T7m)3y2dLe01V+oR9wlJ9$8I4XPp?@o`K z)k-4DrX>6UisfZqUT}fF-a51#Ymur;%bvA8q*hpk<#^?uj>*viAKbA z&C8NbE`y-EWuQI9~w-kX^x9!gbJwy~7ky(YVj*-9l_gK=s1-??f zsrH4@?r6$97x@%|)|FA3M0$bA*?HB{{XU%TzWqZ#NANZ( zB>L_YugHfzOmT9kmtkShrwEM?1kDg;=omr**VdlT_vT=uvO(lL z`xNiwMdAzC8+elP{&fjpM%Z?w^PEv~-F~PGSKIV+PTrGe^_U*+4)wqV9&y{^Z&g*5 zt5d5#DtcyyDVEE*$d%2beAq;%eI4ztxyM44Su&}0y3Nl3OH5gc$-nM5EK}ar z^d$EZ%KfAVsMMN&6mc)-;PcjG%i_bSOx6QCc+3{zvdb`n29e*#SL|`==w>6z4@Q&; zFW0oTG1x6MsEGAP22H0SS7GykEKHduTf|;+5<}=$UJ#l% z=O-YUDflXr?xB)WI|cMLy2womDqE3hNu@w>kr-&ua+=Fw;7}C%7O&KgJ~X{@EgJ2D z!8ToHBn{+lm9Dx?(KJ7F*<0wkg1Dm4Vlh($;EA2 zXLW@=l>WG52Q7XZ@8ceS7@uAtm8qtN$6k;_Nail$QD+~-zR)0y7Xb5!#P7*oiumvP z-oY1y(k;WeOUmQkig8^%3(fbZrS{b}pWi&&8$&SX!=B&3T%2J^^~;4k-P2R``M%4i z0cS$gPn@Nb>E5xmnGJl{j=gnmSf*q5M)D3w)J7sEPi$fD=y=@o%1i3d5K3RYlCHJ! z1=ZUhcAcVZvsk$rs^*$E&*HLc{ewX$SMss4pSv3RQ%Nan*M41H@oEGn%-1BCIPEMw z#qJw$_wB*qCIu|poV~ceJQWjt}0w zX|F>M-@`w~#cI<1tltD;O}Fivxus;0F-&3~#dWoW`S3hC!{FPybYzE^UrF?c2ddQ{ zA@Tb*QB+^m1fkR9*AaxS)rTk@V{-lT0wbzEbLu&2Y+MQf;45d3rXn1Yb?J8HyBaq< zT@mJw2LJVwbrR$E=0O`C;q?mGO#QLr8(OLc8Y$6mGe6ELkICXXN6iOZ(= zE^(ZXAN(`Vle&g0aT0V){Q~vkuQ1mAgwadB{Fn&mDrE24q? zCLvzQpV+ts2PG*bJu2D77eXqdYWEgb<|#1q)itlEgo zzrCkuSN$cM4O2`=_!%f(PHv^%q|DMI|?3m&d+yN7{8=|S@kIf`(HY4SHvB|y;O@vpwI zi`Pls@uQ&kc=9uwDmwbm!0?aYUc1LP+770vH6=QR)0eoprCtkPBE#$B(Wu55IDtJDv?VOD#4^sZBOPor} zl^i$wZqwjE`{=3HX^%fM%R^XgqyHk>SLTF5clvX^s0rjGj?$+iY6u$aiXj ztWqk}(vB?A`1Bn6x`{~Vkyd%TN9xVTH&_@MQl#WaB6Hhf7|7yR+}Wn7jOgfWNXRd~ zbUApzNQSB9x@HW0>*)Go62-UHY^(^tP4+{tFo4P9QOs_RTcXIvM3bR>X zKEbvK#YFu;TxIz?uF|a*E#cOk9&+~4B-Nf1*>1mbt=#ye>+3${|>vyNNEpRC?OCp;ELg4)1w zQs7Kr3*f$YbttV``8q86Bl5P=g^md;r?47PG+5!H+}->g;;ZL{TWdh>)4D9Gn<=ty=JI?XX*Ok zal-^G6)g>Rmbh4+dtml5Umsq!-Fc!n7#N4Yp(pn9Qpf|?AeXaUw-S3C)?5Z!{)R5^ z48#>(Drh1da9BiE%`}abKO=ZLMucF1FQeY8lPvBMDQZ}&qYea-^AHyfB2Inuz zvo_mLlgH2BBgsWnF$OXd$HB;w%tpvCdGdna`Ae$sb!7L+0=;^w_ z$DE%-YRdb0PO>y8-LXBhf^Qpd1N@h_=^11#oH}7>*Twf&^MbO+Ln5@!FEiY~NvW|h3!f@0uUSZL1ShxlhY&ZuEBp>{Nmt%jn98Q!{GZQm3+T|c^124hcK5`I@EiF=Nw_=M*LTHwur5m`&Dwp0 z9rs9Zg$uT+?!cF>vvB^a({U#6j-u*_Z>z?EPmGgP-wt>7*an)QsoEGq?i9<$QBP2Q(;G#RKkRK8S3z7(cXh-zyW>@ z@7@isuLTPuog@H0AHhw)f!RB<@G;$&>jU1AWrN~S|C<-{-gd}^N3%w8%f5C z^`GAu?2$gM7S_NBLF^b@>1iH3-)d1b#$`6VBC;1*(KCfxgPaeaO%8>d`{`L4Xp{i!4h#~f`vNI1Bdxn;bo_>*tCTla`yAiOZ(k%ID9 z9RBekEU;jB{{-p~r`>_jAhDT&IM+wQ%gl>f+X^rCw=0|n?WbJ%PYtcZyjU6a|8a8| zne(sRx`{d`tQf*IAUX*X<=iUp&`&1zsE&slkl`d^HQUPQWMouNuh+-7hzG#vhQo+8 zp|;ZmDKZH!zVT)?usO}NMR8u9AWDi0^8;gM{&YaJJDxam>uN^Cq&^<-Sqbr;tnn|5 z#h>|Eoy}`y#afo(0+4i-p8kS@@QWL$(O4htcao;m8k~$^vSei8g__9#ppu$tYX;C6 zw2db6W~@~kVebjIauO4&u|6qqZ{ZwqohFsNL8r;dFS6;stEzIlltR0@=|=&yBfaIa z$||S*a}xO0*iar-U7%-9VyM@7RsNx6`p0(W@^b*GvZreZd!tk*?M1Ma6hHFwlcJEH zu-W$a7B6fo-?I@!={mH##i2FM97l$nIBNE(HAmvmGiNuXm;woalNC~8CRbbE6u?5= zh;z(q(ElOov`~k4j)w1yRKU3B6;2M7Q=iM$b0bO9hnN=mw2BeUT@ zQ;iO!Af>(z%X-CNRuY=VB(l(zu!jIhX_hN7f<+ksGC(hT#!|!;5u{=EZ_V<&<)FMT z9zUH>G;{gdEt;zn`nXJARlwcOvu_bS!tC0b4VEJCN-_CqaV%Cu$G-$!SZ|1hpB)=leu{`{dTGD0@WNj7uJEIZ z)r$wKb&oEwWmUtd=<6HCR=j5n^yK-#$(DCmn`eX%#q_{ptJ$ohIj`{6O|^yH!{JMJ zA;xS@-%j7Q!sfFW#sa_$IL~qIrD5UrPBZQv@9OJgZRC01sV8E7CRm6^6OG~gOEUJL zbuo_Pov~3&#lt^-4eBoNn-s>}v5VxWpXbuf*K+>j{JII6uXg3oF`fpmaw^Y}XLzf#D<`>5LrfV?acus^O72px~Jvy1;!^P6QT(3}B1A93r~$U8(4 zJ#4sziD>6jN^e65=oyfdoYJ~9H%nz*SbA>~5Ratf&*SS?Ejk{A@!rrJv6cZ`;_8X*aQ9^0Sg-8iDs0O7BcAS8s}t&y#Wz z!eWGQA3jDSI@NT}d+Plp;z}&wA#-Gr+Mvu)xqQ+g`l6-oQ3kGDf0(pwYZ*NFmJMZ6 z#Lt-{c}EeXH-HX_JCL01){(#7%giiSQj-q|BIRrv<%1;U&th+?sRZE2qeLj?sE)R+ zWrx&c{Di?x-@4hLM}?9`^n3~2+}oexiEm@HTdFM0VN9cgtPLUP_kOzBmmW=bDnLsBcUNCvkZka9KGlz($8dy%ne!$01mXMn^fQnns~$gZ`tg&b-m-rtm;%Td zw%g-*JjS+}Glv$1b!br`I(=;c6japL9tXx7F&H-@??`G4oo9!Z^%EqU zImry=<5*bGE$k$RYejrC0V2pt|2!i9+yyfhvNcj54PSMzwz7feB%wG&kdvxiyvarl zvT}<4kx%|a@mQY$|AU3Z8)|I=_*lZrWBXQ#`68xfF9E!`d+6k)H|~ z0SAfSd^)iciq3}+L{k5OAGPjf*Zc#Q$welQ{37zMylzfrl8Ffp`{#5&+hWH!w8a(9 zzJD}2#;8-%+b)BosE~w4%$1ecUu6#L#vfrXcNSKDH&NzBqpZ9pd%fH82fTY>BImMk z;K2I9cK@AU&oq`IG_oWg(w9Ip0wFn|9}U?{BzHE7I+p6{HBsGcPW(FVUkhLQ`bsJ) zsrx&K4EnzBn8<;g*rMm#Q1kX2jfJr`@xj4h1?Ohz#lqb{(--CL3Vn-c*m?fV=l8l7 zoDXC8ES01CoqaywX0u9{Pun=m+i!on6X%8vTX~|wRCRraVf*DB9pd5IBnA1pIJS~s zQ_hfSjarr7Qt!akB7TN0R~Fn^qk>rddOS|petWOmvOiD&?M~y7cdkK18A|9rHIa&Y zyX>h^z*HHYm6w;4P^oJ-a9sP#AMCpL9o$I1%PseznxW?np-66qBqXadEr=-yi@atu z+jO{crRpZPIV;2GFE=y3@j6gg6s=xIl~yDkP`w*k=5yO2fec;umbypkzRs9uL98g? zNZ>r6U)91}d!&70W5eBxWPHeh^)TCREN8dgu3mKwFU!iGUabDcUM`^9U<#>Jpi8W+ zvt}0h96tD8o;4t;dr9tT0#t-IR9`?Lyi9x;sC5yh*Pfhned5@gE&Mhrvc!Nt)_ogW zB>A=!prw@x;f1xrc9ZIb%{_xkn_r{c^!|!^HR9e=wI_KqwbukD&0X;TB_YHVZRT!q z!$O?P3VZL?Us^Vs46?23aO58H^`W1eK2RRJhG%!_pc1Tt1X`~i3<(!IT?5R>H}vooXZwC{QL z{@ff7;Z1nZh+Vhkza_5y=$o8Z&f2zpe91asKQ_jwUDKSlSVO&efGwSPI_|3k=AR#4-=R@^4SBoc!4y$8r0W^Qd@U7!G)pn{q-N&QlJ7ct!L9B7?(*1cJCKNn zoF2g&WV-X{JR0?!GeBQ2?={noW;j$-0$YTtguL5oH&>)6dJDMX#( zk-g~vW@!a`W;u>Me|$I$Q!9`q^r|A$5eu+r?B0M)L74?4cw3X-(An2E84>rnh^`Nm zsr9R1tJ?14zSLb%QStGk5Gm1+i%@yr2Ombva(>PJP<6DK;C}A{8yY=3OSQb%|5kp} zA&Q$}EQGvSGtkt~a{tT1*48)6ON;3D_q=~a>O+31b@uvFYqwzO!Gz9jpVGYx<^xr7+vQpwyR@bnmGLXo)m9 ze@I($d>a45Ip&hsF=< zm}<(CY(shfY*C2_qt5R+LdWf*U5;C;3Xc0DaaZUiy%Cc`G9%bN78L__6=SBBJ5-Q_ zyoBwIP)cC>DF7d)Sur)?gK-YoRU0W}y2V4!}hW&m#ezDY~+WT&o6yG7!Zu`y`m7dol z>^nEWxx>rHMe94?T!^B?hU+1;um9VkEqwzvz~45FW;`Dd2(vXBnuTPFMKi6CwwEU04Irew-N7~a&qjX$PW!|`eV|N zn+|-yB#^_$f_ZOHXJjk@T~KwyGQDPOB#}=!TQNaMD-?lSl&&5n{KVP3%NelpT3`P7I zrIV<~_3gC+K|kcr_0WdXQs1aYO0T6~rHXwCl$6il0U!9G8;g%$!S>{Pv_Izx6u`uk z%H~F4EAa3^R21yBrK`rbN>6K7JQX;v?ZkgjK5Opt>Zh&30ZyCHHbr#!a~fp%fNs#M z(~@0k^fRp9fMgTic3Geg!Rw}`_LSm7I9l$wpM*zDO8s~3_DIP}oEk)sWp~TW8qymP z0)}Nae*-kQ7-pI|<6FBNtN!TCXi{lb^c zj72XsdF-1yX#Z?QzMhq??GrTObT2YwS@bE+Z!We5VEhpT-IRm*9>f~|`3f`F31Bj< zV2Z$r;p5Bp8pisj5ZS+2JrBF2-su+qM3Z|dA?hr;8S4436ofa!9}v&7H)lO} ztgskRwQTrKJaNDV%tw&06*oe##yaT{LYk`Wviy>!V7oa(brd)XfuWJUSahO7*_fhN z?$F4!B&`7O#~}b65Monsb5h4ETojooq&9@0pI6l+Pvm0$ShYmNmJLkp4;d&_yhI=w zX0txoaj~i5reWFNK*k)y0XD?o?_k`1JT+q**w=GF0G+n89c!*pl0i)|1@$5tiEKkj z?)C2Fy?*N-^9`VQFlM}wHS>Ea=#anujdAtj%@XlTqu2-aamBFr_MA2j=g;)hku`Xo zKq^E8Y(<1%RgpZ_0ajSPNq-`g5!wmJnp@TG(4Fm>UlwHdF7nktus{X$X5=Uq`UKmRKNQ76-L0J?ZKN zfz6xdC4hmO`3LzmAx~Q0b5MmUtRfC(ZQock|2@9a*Jv7U%KMJf-nAbYqu3-A^r>>Donp1=eN^bcQKMP+@A`{;4%O{S|XPm5F8 z@1fT^7j(On9WQ-o4ZfnNm3WbdJ9yZ&fwx+%u|U&ZJ!^u_7#3sYfNgj1Da852PuaCo z_TVf&^*A7_JCR30YxT!39C?BOW>t)3QALHuDHL1KQ`76bTx_^h-DQ7l;9P08VsZm& zv}tA-WJEZGUD4{voBS{hj;m8)C=W2=A5$9}!_J0D68l#;#oj``UBEhw>v=`pt9WGx zj}shF1a-hju}lKupdrvX-E+Gb9LWxsGIVB-qnNNfc^&XWK(4kd&^~dQXoy){>xB=tDdfDl`)+>3!`5E#?+(T>-B7EyADT)Uk~W%j9HL4NTw|tnrbtjQAu0a zKKRuotkt*HDvzq1BKV2!!7~J4lu#_~Df84Iqh_P9u!4YyNDBCc=6b@Ry00#{6YS(ls_w{ub0YRCtUdY!_4%RiH7PuNXcHN?w6^$ z0+U0o2f(8HWs{xxDW$K$k)IaxP&kDcP@Y*VS9}5i5b#%T?gHbbn@^|<*_gQ!8phw5 z>uwF>=FKhk>`$RQAgr(Vdw1}e=(ra>dwfrl9%qBpb!|;L7`MA6K&F}tp&e!`Vv8dO z+@>>oNqiwH)-PYx*eP`btXRDtV_v!VesW8AXryRPCiI<|xwnzX@aMOXH1KPzW^b2Z z#;lxp*Kn~B+E8%NPL^S`bE^cZ*bB`MNG@gMTBs_j{hr&8Nyhdk+Sn}t``Cd(vHZ78 z85#EjU7QtK{#~pRtW%zie+hB*rulV(uIN*FeuZtTXI~8V061u;lcebZY~c0&$8RQ* z&b8|%BA}hKJp{5_m;B5@Kp^lb@Qeu~w7I}_71?x9D~tnDL|M%q{50XY0tJJt*XNX& z(uKT=M=<>gG@$Su0ILNrUiX7mfnULu6>k;I29^(g+Dhb}x@MQ}pBJw*XHPxA zZgn{*XSr!+WZYAsyc$3@V#5!*253DiMD`|45$YQu-@$Z#kL~s0MmzA@zFG(hHUIIm zJa<84VxFgTwg0L%JtMR_Y+!{ptEtXnC4?AUIszsk6jC^9c)9M4}0%M>{}Qgu{ki0 zu0PbxExU#On)g1N3sy6ZVjXf}1rg_xfn5Pr(z|_>G;3?_ zpVH!h_rv$tbQsnpeunBzO*g!gN=@V7&_1pC-W$pdh`ZLXb-1-ZXu>nrG?U)(O54_Z zE^3eK3nb!+-3w|&UA#YCpW=O3*e;>eTm-zl?i4n75IIC34>{6i=3er;F|KEay{(Z( ziKQFhO13sFFl3~Pv*rk(4|)t<0~kiK;DRDZyJ>hlDM*O10^?YufzK5Fi+3xRmf}Yj zMn_W8vkTG;*m1A5M_%I5xYkYGMyPox3vuwfd})~2*?J5)KLA57th|!YYO@#l;S!mY zq=LKSC4iIXI|>!|1-=lYqBl%=J}mWeTcWIN-6QPyP)KIX)}YR|lhip?_8h<&)ET5D z5+C(e+yF}b-K7e3{9fuTA|%Ox++oaRfNBon6%@mj90j4ZqgkEAERIijwVE=3*5Njk zNJo=jxgr4u!Sm7e$@l=NRop8``G*fq0_-7*b(*)2=g5N;-raq11VI~pGx9ZYY-PXx zI-;y*%vHxfP|?;y3E4LywuoxRxDEI-4g|Ra)!+}f2#o0BFzFlGv2<9 zJ@X@HaBX^)tC;_zXmk!YaB2>CpE~ZKp_ir>FD?LwaS;Hs9@J_eEfm*sy_#<};Eez` z2ogbpR-tWy>X7YkEDG>ye8>=m^1_cX*NT$OO$3c@yMG_P$_EV#01G&3Hdfi)J)l#f zS7(V4kH+S8A*KZib zhi(oiGQHpW+t1n(St5<*RK;l#RjVar(J3LYu3*rc^}^EMW#eNbW^D7C81!Zexbzzv z&)$vA!h!kE9ZoUE=c1;Jh0E>h*RgkZ^xN_cokgBV zpK+r-UEtoZlSQQar%;M=Zg!x%W{~KPSiItgHHl|;F_||VuHfHM(5|gi!qDq{D7-!K zVzj3>d;79-2X}j)JAQrOAH!hIg0vGxm#3%AHFs~(#Ox>g1{IhtyrF%SpODwaRJ;Gd z>*v9P?9BmGz{TMQFMh_FH*=Wn(Md|zCAd^XQSPqK4dQOtuUoH8N@W;0m-U^Q!LFz; zd>S_flOHWenwc~FJNGQy$J>yA(Nr*V?!(6t$I$vVVQP);9ir=WsZpISZ$C6E;_mox z-16b*993Rp|1&YtRI5ZxtP(`*sgfu&%{J_n?Mrpa|JNng@ktXObUq8~w>23l9B)ZF z-7x#Ms6T1eltRkuLnFHmtMw~M9mLts1Q>p9ogg)SaV7KH_jFJ74j(F(?jMv~pKkor z@~y8j7%}}WMpb-s6q$`Yky9>27xP^U9ETkxb>IalBYDr-FCkB{yDuVHy)a^zkG8ka ze&s_%J0adn{&s`ulQZq(Jd?n69ok>Fr>R;$jQWgPay0h$F~6e8zfMc`j@4TtkSbIJY;wJw5RZkygI+jb;1v67PG1=N(!oq!;FL)>ta4XZ|D>FLKV7dGdx=r`_l z`NKjdo=(0OA5`OTC8OkRWNyDdD^+E!61iO_w^en8>MQ)P})@|mPRIdVDpOY@2oFL ze`ENwCNCSa@O^xov2eroMbze~IMtWuAGvvjNWVVaw6jQDRNvhE`Zo%hoDnxnP0B)v z@%lsNG*=363vByj{;+;PeA>)(>3rGg=EzJksseMv4u;3u2d#1 zy1=fxoaCpvI&*({T1(k$@B3*FQCX496lVju`y1yzlG#EMcJp zo71lQ9;&kQGFEzp^1B}(8~E+ z--dkZ8yTka42-ELh~gv`n857Dxzv`tnYk`1GUAYnKb(WFvI*QWai6>UWmCSSpmr+q zV?o^Z0w_r+nI~G>qzK4-MNBz7+>Yl!Te&NTflTSve)tNW-~i!E2N(9|n{$`#rvkuO zU5H}5#p7#t$iKzjs(t7C$H|I)HSU9=eBcH{fY(}1L0S<12i_{MIWh_nG0i>qzEEiQ ziKT)ySTf^3sU4e@XsJM(9BC}cWogM|?xghZP?RtqzZ z)(L0DNL7u^9&_Je(N}m~Lk|zKz4N`Q^-s;+`)w~CmvT_pWn?T_I`DhSvEP$?`0LF@3$%9}G|ZO;nC;E{MCx)M?q|Aa&~ithr0$6Pz&cZ9ErwSgrG9W^;Sh;4OtheSKI>|2}Hf z0)34Q^L!!v?+K*T_c%xfsi<+Kq|aZnE{gw_c=~GmilTzJ^O?km zhCc(05Bl?0&P60<)km?NgRiBoAK&E1zD{{z3;p|TQ+R6SuP0H#R=W;wqX?mU#`g?Z z6LJft^@K_JK>IHQUO(!fA zcSs9&eKkE&xxDz^S%K+ifGRh~Y2@%`*UNt{OHXDh-gO53-9=V2aQhlDzV~mhc1UfM z%+wIoVR+bDTYNYjX2(A=6lx@oxFw+7+MXZgQ}_Zb1{NNMmPdXxZ5*3i zxpjtcL-;ZI)VdLOXI}mZ@cs`vxQ}nkcXXvH!4`LVHd`j`)x~8n7 zh7+#K|BS{|yszKtruO~LaBk!@GMxK9Tu$7?Vbauie^0Q#AJ4{TS;`%P2>j1&n!Y>q zPzn97j<4+!MO16Zi01-Rsxme-EUS6&_i=R7IMNi??|t+cUx;d-6q&G6jMO1|8;tv= zHtzeqY`lG-^UC|C!9}IVjzo6`vt*dC zve5&mt>JZzxalFRy!=!Nfr0eYAUNy2tJXk98p<$%_Aj7{#Gc7{mZbXu5ZvqCe z@ysO>a|+-ozcpM!@?j|Fp`EDMv#-7^9IWl?^9l)$&pGVLx`=R&QrO{Ern)WgfrEtW`yX2O&4*u$^g2&-}zTyV!>5pOe^N)+ z_0U=pm|-&RYUbxec#FUB8mPEB;g;X6 z=ozlPNdDpV?XeAXVOa1nk#8P4QDM0>obWcit z&eZLH_-tL88>vx{U9&i3e2B^2h%mjx&)k_KyL**C|RCn{23T(UKL&A=<(o4AX9c4Yd) z+%LS;VlWWm&;TSyky!QhZSlUds=B=JMG6_}PnwafmKhyLAqh2Iv-O7-pZT6uTd8}5 zsw^V?m~A}3b>>e84y?U?qerHipk--Qc_U{7pOlvAjl}P^-uvNQyMZkPF5)ULArqe; zEIrB2zwjCi`s$pRMU;@QaHL=%Wn$XRb!QsbC{syDtRXP2Pirk5a2abUx{Im=Bx4+Y z^)g+m#e@3IhWE?+&Fg~zD7*LujGs9(*x1k}#QS3xZ|7%)8<0d@Wr$C0Y-&GyUo$4; zL+l$ce@dB@UrT9}{L`--`pXPE7h~({6s(wUUIJsbV!uiSSqh{EX&1Ogg)(kaS0wGd zgur?NRa=+l*CPcw+TbOHH2{;r z=0@qEVvK@HK-;R_mcy!>Q6ein4<&_L;j7NI-4sg=qO2m`THh<^2q)^PN#R;Zridxx zyj(8!S=oF?oCL>bch#6C<#pT72hOz9BGq3%(bnzI0+;Zi%T84CON2Eh&ROx&Ppy(X z(}n8{@d;V=%O8#DQs2SAC=)rn+TI8MGKnrZ=iT{z7p^E#@_%T1%cv~ZZf*Dmm5}ZR z0YL-=DG4b-MUa$6knR!@q(P-qM3hbuxM}G|BveY2k`Cz>5G22O;d@wyv}nVg_`f}bx5q%tC<=0gnM|)1v-*vp4Zu5>(BN0$6l@ao;uU$n)0wZzhQ`$~zUgT@$7D(g< z9Bm5fg?WW(-w38m;&gjnbR3o-dhG3zBvzqtI3d>`) z953%&<1(t+y=liO>D2msmyfP%ef^vqG`;vM~}|Sg{(5DAcX? zG{MQ!wxTj8N0h=I`TVG;}t_eTbkm9oZ zGk~-})c80qYgzJh%U*z=K4T2vCc^nnq1HRrU%SbyvUt!0B)n_t1#w&d&1Zg{f*g%ND zOAvYp*YpdW-NwKqjQZZHbV!8UQoVB#Q)?BKX4Ck3+;Z_+9&?N;{4OF#h`6)U`=Q%3 zAm}8CQiWW(9+_!it8z$u7}4gEokw6WID9@@jc?C&_c8~$!sh0u1?tgoNxNb<+cN|H z=nRNPGa$?8fFQH;ych5es?J=wD7Z4-9A$%|@w7S(Rka^-NO$|h26hPMbrK)2QYK!pCb7+w!N zIhb%coH!|D<#ywO4Q4g!cP)_dCt)5bnVc!{*mXDSId7+zd9)w7^5Al}G6@YsSo#cQ zGeVVdeni$uu@L*NQiJAp4xohYmavt}d_=W7odkB5%)oQ~&Ny*uDZ}9?~}?PBa;q2!r(aqxhlm3fLheDB5CFD@j7Q@nZ zn9qat>WdlkQTq=aDA|j~KusfraDdW?kn=9R4sb;d!b?>mad<8n77B$GkrIVxtW8|} zkpJb3@=Q;yv533Tl_dIA#K9(3MAFNnzS;AnD`iH-Jg?u=pE_==HN;L{Ws>c=*8q73 z@}%5$zld7}-LoF8RSs==@~OLf5ErRM-v-@IoQB6Cssbv(@O+jQWNfcjfw9y%yI zGkLGW>$2VsK{Fsx2#$9%+w>?v^VyL7s#VbL?LBNXuswxjaDTbu{LQW)mG;+n4rUt0 zy(?^2bDhPj_W#IN9bG>ZtNidsJ+YN19#W+7{2UvIHMbz)nmCLPEEtzvHNqo5pVd9C zOJG1R*q}CWi!=5Di`jMYghozD3m6i%tJhOJDg9fpL114bMG}a`;tk|=t8EC=8kl9> z!+jUN9tC``*yxAF%5^X0nMBf^-MR*Q39zFAMm@6IyB|hd6G3;n5${CSyTvASl-e^G z$P)?lHoWT}m7;O1f&2(>fA0yulg3RAbzIqITCu!i*Ndaa3{=XF1JvGpkPP6gYkY=E z9MEr#k5~9;4>-o=NQVW@8A>1DZ~xp|s5dO-C}{gEZ_2affec2TtI5%%SV+nI>kSs`bUny-Dwi#P@e6WiP(sMItVFmkbrBn_m)FE-1 zX66-^vMWqf#{nS%4NZkdH}*_E3pyQzy3|I){*kqBw{~~k6*K`h6@Gc$g*zccS)n0gHX;IJVN7qf>&%in(u&@}AH_(B?}=P**GYO7eR!FgjM zMH1yVu|7`5)B~^|q`##=c6*ptrYNQ^wM=oS-o#_C!vWG3qb&xv9*@vmy8(XmRrmN~W35e& zj;?I4O(@{@DVse;#65v^qyeOE7L5e9n7#IpNVGkoB{n_q2YmZOa6vp zRt0Ou@ZF=WmVw&`TQ6*=)_*_HIoZKL|E=U?LcLv9`Z}!Xx!GS0OLm(Tu*|12NlRj@5EYPkDwWkA#+=3 zx~L)QUjG9K_sgqXe$~W2mmhlv=ajz6eQDm>`Y5wqL$f*mOqOK@FfM<2fj43Cw$3iR z>N`piU+agy`b#M;O}xgbCqYoQO~SE#VwIRP78eUY^^W7aL_r@A7gSwTnG_hEx2{0| z8t1(a;zw3txn}j!rG7!glO;x88v|ja7W9x*PQeXIRe2%8G6m!cg>qRwZnO!@)hXmu zzo&nAll8Mk0wk8(@8$0dQRf|*S~c&yicLOaU3lDghfI6kvZ6Hi5UArN%vW?f40!T7~=;*pi)cmWwwljl0?>31n8=!j*5jG?N3cowQZVISdE+Ap*oZ}YI0w^*REaX^?X zdujc(*hnnGt^Rylj1#kEgOETPquLxyr>=OGuehj4%OLwb&T1+eUyAFswxMOn{?0Py zqO``s6Z1%RmX9V%IeN|Gg_5wRFQ6i6Ya!zD<0}Yy=P9`euy#*$jf@NQk;{(~syv(DR1Xm^qv9EMR zj)W=$QG#TAuwj_F8qYu$(Y|k=?ps@nAsuixIauh$d*^L-`VJ2QAaXl}ygWW88`omL zvV)@5o_3J+o)6djT@w8Y#)3-R!naaSIaHJO{ip@~)R+U}`EEIcXw>{E|E_m3oQFOe zDN02{5yUWX=|uB#~PrsaG0r zeXwLt2OO!LW6(c1fsF!kF~#3wYfq15qo61gf7!U^8!WBu=;*E6tbJr76isA|dtm9xk^zf4)w3e6hBpRq=rSYGx63 zl7|6@Hvzdp1P%+j9+ZK0q?8MvFup9n0CcqV>@7gc`9G?N?z_@I2kcj0>aJZ0KppCv zWQYQg)lq3Z8YKl$9sX}Bk)fTyqcby`2l-T5+*p{8PKkps5<>x0m09E*n=EQT-4x-_ zz}$#weKf<%Ax|~4I;H`0uv!&BkIw3TVTn~BE>_iiQ8%$sjjXK}!!Q&Ar+&C{H2lbQ z_j#vGjY7qh8$BPG2$F5@nHd?&HnP^0n_pm9cYOedS1h~QiZ=iQ#MH8m;5qD|=lA66{EOvKFI{5asLO)-q| zvCnl8R~uBDG6Avg_<_A&H~NpjJN!1lTN@8j7dL)+G3fpfMapfSwfocpb=dmfgDKxM#aAtm=n__>a2GY&GvA`OG!+H*c4q!2|AVX+Jf`jF<4>N#ihG!*@*g%;J33`fX# zzzDb~PanGz0I*lBHHk&1!E+!R51Ciaqx!40Qu`g3_As!xR>Uml?f@ie1tQ)1WQe}6 zvLFcRd$$p#v1AY2yl!7XHe!|AuBcj~iQ+b2`d6NA)% zVA+u-4nzbznBsWe^5bhO2C_}?pzx$Ba=)bxx&WEWzsquM?XxtAs8C32Lk#wS2@Dxb zgsi8)BN0<|ERXIBXljU9)^E7h+N746R#;AvAm~FA#dN~v^ZD!7)*IZ|EPF=Zj1Xr1 zDMzPlmjSy-I1vepP>Mm4-nD{w*22nS>#W{}J)>VnD-w?hk*Y7?L#uCl=1#Sc{xTcv z4bJ8dKlR{k!~+qf+~RDKm=6jl8Q-Jnd-*5(C`PGcJjC}Pq)_wy^e)(fi-I1SoX~F5 ziqipfpMv%qFb~y!I^|m4l#%o^wKjH@u#OS7c&JmAIH=o3+#>k5nSIN}J^>NYVPjBV zViRzSY7K)Pjd{f?W8uto(&OIMWuO;#WrU*X6ojvk%!BFYpP%n2_xxyYBts}@VsbkP zrPCPz&T5rwCm+9Jj9q=SZ~*oHhE5%%&*AEb&z0CO`qiNItlDw1)w14;B)s4ac;lKu@`)T-nB)UiU^4(FYjRGKJ z^7HF1pM{&sbEUcs@WYF3NiA6mIuK?_I zb7RW1%wvQBf@AxOV}mIuM<>tk=nl#13x~aYN&1~(w0YVAV1LXPhm%k_$5ADm!irTW zd8IsQ7Vh>*9+^DM?ShO%g_+{&tbOuYm%2>?lqlr$F3S$6#(VlL)lFO-^nB&$O5YB> zW>bUJ1cWPlm{9RYYAg_5@uh_hO-9-R5I_#K9Hc|wVtev6D@SakW|?2JM#pWZmxbxp zs!@Gm0IAewJR)QNfD=~r6w;El7nea}1NzMx2-CgO$t19QMb1^C{o+iDjQHT{h(7^w zWflL0g|sLP;<-p!bQ@e!4*%fj2Tce$f`4mpqrl=x-@GXr2{;**C-JQ)&keIK)i#Fa zH)K>LtR9ODlFC9o(4?jcVjehfZ~cklA+eJZeFDvWWK>!i=mgcm7Oe*0UzM!x{Lt7Z0*%CU;RR_bsSG`Mt!YvJL3{B`zR)cMxg#?aa6 z_YIiE|MuaWH{;lfxScH`^8@dEBKJ3TH@99`(NQ{Wfl&I}I-wc-#IuOsRsytn>*C7= zpAU10I=%;e+=T7DL-luMWZvEj5xJYf0}_O0MDL5suNYrz)1H=9-{SqPt{RU`*cNRe z{f4vx3u>#H1I6N}fBLT{G#}QwL$i{VgyiybV=<=e;p}{4cOKr-t4N91`tL$<=0iLO zBmd@M2juA;Zm^=vM6r1H=}CPxe+#3r_iA|hAOU{i)=@5xDKvJc{|`UScyJ)k89Rxr zDm6j8!lJP|^eSu*gd+AbPxp7Emy!Gg^)M6)C|D0sPR~hFo^4=Na4_Uuyq-zsh5#~F^!e58PYFChb zi;n-XKjR~=c9_~5wKX=INuhi;&e;w%L#E;B^}6ZkaOIIy{rk=6cr}ivGxJYPC(fvq z_{oIq`nwGJY98^%qQLNKJx3iQ+G-NXcWWHwN{`)Kh-y`NEY)5> za`mUT(IHB6x^>8Z%X!=LLc)AduniHiwVjGC7cX;q(mO{MaE@pWI#C<$Fb0J$*w9~n za)mhc?Y=I{hl!Mwy;?QaG-U?q+!nmXlSnFu39Rm0m{b{liyxo*(VzpJ?IT+0P5HBR zI`- z+BpPv-WQy~v~&>O^BLh?*Piu0U4lGL=cp(}yLKm|4w!TmmWJBA91=i52;z$ zoh79HPZ!0_9!ek0{*M@bUb70~@z)4mymDNK`qpu`U$xNCuwRvM=$eY%887Y^ut@_v8yAQZas`!;!=U3pXrh5(Vn(Lkf{9# zO+`6tNC>7VrV`#7ETkB3olr`UZt$r`8z1dpX!0#3Foub?X&j#28$y+MZ3ZmCiLSXG zZ5jch$*FHOj3#KnAUmK@EbH0sDbrL(rtmunbB*@GYvtrLyC0;Ve9x{Bp|^cn;yPy!?Idwf zdyP-eXr8O!q6i=Jm{Rb~2md%lb9P^ptZ=yJYr1CWnXBKo z)cfN)H^C_j{svQ$#W%mOA$h&(%frs9eA2&rDf;lc$e2_9bjd|pk=G$)TvTBmih+_+ zu@Rq}=H-68XH(541b12Ly+_mL*2C#}G+IaSQ>qzOG_B=F^_TRxnLoMr6idiJN__x4{NY{(ji6Ne44 z&P5%>jyx^t?xBknk+Men_fHVe(Q=c+V`3x!8iv?k!-)2Y(iFI_Y4JM$eem}NDSQ9H zDxI@!p^Iz+qEecx$)=I(@ftQ&(#R-;*qz1x!zk=u=zW?E99xgc7D$xm`g%tw`}9vw zI)5H=#A=R=>&(kN2-sCjzp78KS?!A2^6KeaL{=TzE+K)CEY4>vIhi<36o0gSP<2xF zx~GFsNPzOGXsemO4mYE?>J<^~=~0x0s}H^Ah=nB`H(^*Or~Y+6_xitv@qbo!%L)ti zqNSC_Ayp-1GGL@qvz|4f(Pm%gSUp6ilaHOXkM=)nsO>}Oh- ztG(|F{mkbdBs6-k9$(U}V(wcB<_2w>kVJ#GJxggOS5RS{igho{;%EvlY8Rmc)ysdM zU0E9JgvY)UPQ>7)<9SVWPu(~=5l`?DmVPoFonhuRw~G1cUpyi)B!@p4F~DLoVx zCTZ91G24W|srBXkV}0h%WoO>uW4u!MM0o|ht;!HKs{ct6Pxa`k<1_U9Eu}~#H>h%- z48K1|NyLQ7a3AwZg0>A)VC3fu^8lkK2XJ@~k3F+I8Fhco$xv^~&$N0h?9Z({z#Zxj5)`L@{XIL+!{F8PTNdox?NJ}`gl9BbUq10lw^>Qxe2t40 z-m1z+vwygftIz!{jEedGo?uAH!}DMBA&VCn!?0_{-H`u|Qo*Eyc6iGT#vP&+~iBQV6m@RF^Boq9YPrXrL6^*9q-kxfz+tiH8 z)AvLwAa}se$Bei??OlqFr z*BQNXIoSH%k5$9Xg@{_*5B(JkBx){Jjde^Y>0i&z67Jypo_QiA1!vj*P2xpD0Gx^{ z+AVQntFd#s4B6t|CR?Qc+!bLzST`$ zqd#(ispuo!lAls7c-FaYQ^PLO#y@qw(0%_H)l4s^`}mJ`|o&ij!6N(*1xDk9CEw7J!x(lkl?IK z7H!A6Dw*anb8vKU64EiNB%{eD$uE1HtZ~Bs{;91K^>v5nDx1xx<$k*({e!F-8r+{c zu^6c^c9t=^Bm3!)Os~aV*p1e)Th>(Jo(;(r>rt^)(fl@lgXiJuYxJd;xkdliLs*?e zkk)a{&;TVar|P#C+WezmF$K-gXe-sRL!|8du*FXw6@VN+e){!k;cq)E;CB|5%)cz* z^atUQr`^1#-{%+m3mcFh{O|wZxz)n-1s+R@PeR5Q0qC1W{`1MtUnsntepI6G(0m7Z zKKAPxLWsgw6Bg?#NB=z9WRQo^sks zi>=vsCX{fX@xNS7sZq|oHB%Vr8A7R1J<0e>WMDy@o2QWD#7@8d<4FGV&@~l6hgT1h zwrGReWmUc^2lR=&Y^|MLeROYr|# zn}7dHYu@~k)?XWPAqD!0o5{y`Nv>AGeph@v+su4EwBuvj zm1kL5ou>`e{SNS&m=c3bfbOeTuj1AwDJh@aJ#Uf_$Blv&L6>QZd$2-13-wT#nHs7& z&|e>~XL)iN+6my$cz-*j{eV3ZrWV|%J5;7{ZX-1J(T%^%@^l&hwb3;G`^A3E8I6CN z|KtBQ;0J%Y0ELE-4rtp_I(;eNU*AWb_rJGC#I*kh7N4OIt-y9_C(zta{Ll8$)`$fE zGp_gu_CnsU4iGK=;}(Ab(X*qE`9=I?p4W+DXW>Kn1^(T7|9yun{}tE|q4l>q{WB)g zSFh0VrJEv1UsQDH!?$}D)4`wmpvuw=RVtJp$ndHY6)C4Z`YbL`?Bzr0C2~_~KzexZ z3k$1}c?-qizTeYcmi{7MC(ySr#GE}6ZZitrzN*O>d@4X7Yxuue#JBGVp+O#&BBI@< zqps&7jPvs4%X}_4<$-X=Rj%yUrj%26HRBMq=^#*|L@r|Z@9kS5J7u^XrV*x}7K$5# z12u|4j63-%#hzVS^UdVY_xUAtrY3UKIBH4Kdxg-?8&$aI=EH?LR!McvA1yi^5{zm= zRQ>x))m0P_>rpC{8AdVoRdQ*S1!bB3bH(S-{+iyX(w&tNx~9eKo)D}T`;SLTRH8Du zaCb=jqE#uT#LsEUK;QD;FNIud`J7}`HZL*Es_fr;{5dag)oefI#GoKBy72f?dQzH# z^kq~T>P1?`T4BnO9_GTqE=PFWEVDa`w$JI;6_RAWj~0S z0@qCH=!7M=+0ljstFo(@v1rbh9x6p#!HB9(J}0+OEEunF!)b@ z#yd78Oq3Vi1;!?B)QujuhO6W%IfIX}hGl=z$_UY~Tb-LG4xIMh-JZ`xu|JbVZg~U! z@&3u59^WNN_g^gyHIdMcbmgvsLK1q}jWp!uuiXh>;rzlG48MSYTQN)G59HXCIlxWNJ4zRB_oOGh_8%tH0{%)2Yec`$NQLZft=khsmRE-NHmHduYR)YN|lliM#jdET5mbHK1u&q2-Gf%4~@aLQRii(}eIUaYl@zjO=g79fDnQ{B`bxYSMDJjPY z+_|s$5foJj`-OZeMJV4#Opxls8SSFxJdQ6#Eec5n8#MVnRYJXQOn zNe{7L;oMc(!u;p+E@d*x2yGLovc0bB8^D#_EWA5oJX)E|Gn5XZv{+h=2AQ$oKd z0>d;degB^HO;~6sy%Ke(nvRZ6pKz08sI4cdtUD{ZdSHirPs4dj1WA5QKQHiD45>1|s0Tn@SSsw zG|XvR{-kW9byW(I36y38$7KzP z*Vkf8QZJxbG3uWBE(v#G5ufSDzebjx@MT9h=jHtgaqn(h)vjk$1Y?Y6hJHP6{8y;h zeszn@R)A)1GKusxpcM9zGe#@lXG``h?-6Nz<%83f1} zIUK<#Ma78X^sK$jWkbn+y zczB*#=55g^CMlmq-Ja62vO3?BWAE&x;LoFPv$JOkD#tNVRW&u!0|od0K~43CzdCeV zTU&V(IjLIy-|#cm#M1H-4;T4)ct>xpd0;?GpNEGB5G{=5AIzPhP*PDr`9?rM0H8`d z-p)l0SJ#Tumk3#$TmCzU``6goG0#TU$Ijxx+T`x);uNe3_=9200;h*A)io&^1oJF@Elza># zlFQR7FtWA|>FiWY5_35(>^OdNu+YG1bNUv0MhY1KsNkq5?6jKCM{rB)*Ie~WWO7gw z+4fR*B&MLGu4ZBi+F73>n7&a`ozqe;6TJNZ-JK&vt-&b3TOZ5{sA^ zExf`1adW1f&-&ZNGMoM{k1_6*b0nX^K&~pqrxziH$HKgVva_>8GV`;)Y;Ns%4N4`6>F;n3%7#$mHoqfMVq+jbw@P@}g#=}F*jGrb!$TpBs!lS9bzr6FP zy`|*>jPvv7&nT3zurLhr{CeG%a{SE0($Z46tms)w%S}jT&Heop@cl?FN-|U=y(kNh zr<h3@8JU^U$;kxLvF=ZvJP9f)a*Ur*{Qmv>=F#p1>*dS1kE-cmBNzH|$p9i`KS@Lb z^7^y4*Yx27#_{p7h|i%&^T&_9nQ}y#^f9La?-M=F>e%}Rf8H@lXdLmYtQ3Zzq+vmK zO<0)f!-o&kt`!v(R>NhNYh0GK=KPJ$2VhDaZIiLEvLd6fwY7Eg@W^I!8TzsWU z_X!Z1B_^k+D2@6%#SdMH!sP>33~I>15t>^<$jS%6uuVGS;BZZ%L3e#{@qE4V^G64} zgL;+b9hBUL4f^xgvbC{D+!;2-}YXAX5Y|8wYIbveR*~Hj=X&DA2D-&nrn{Z-0T%+VR=%$e&G_* zi=f~Lgc66HrT&G10y@{#Ve<_c>tpVyiS?H7fHjfK6ET862KiFqZ2sCP<&dwsgS5i`<^WF>>)dV*~+SZ=ICNu&_3zpQ{;C+Bxh+S7F zu3!D=Tth=cmFp_4$M2Qg%?|J1E4mSz4Oy$h<#@2y->Mx3@^wW#f9JPJ*0`>Qe)@C= z*`VNvh}PMTH^#QMVX)Y?c8N(z;qXuvHnww!8@T_v=rC3j(ATG--{3_fVA<`j&odaG zwmbnS%od)#4vhRKyE`Kt@JJ1hWOBw=ETa++xiSzYzqT{G@SYr=6U zagyF*m}kzk+C$b4gzL$dRzJ)vFXug}&&U-jYPs4xK$Hzy+_gEa6D?B2- zxL9?j3U$@|3ps2zHl&=rUmd*g1A@2j-XWMk>-ldF#xR3Th?9qkO> zQBtBXoQLgv+T1LI&mgA3ifh3QUQ8|W@B*xa>&j62PD)%U?GG}A`jPSE>raYd04?q9 znFh74ao3*|4r~eA4f%mT=ijeX&a8~l;JmdyUcZT?R_&q(L+{PQv`v-k_%gq?76}(qXMjW=92waLSE#qXjfS{& z3`OVqqFNwBa8dX`b)oh5cSztu1s}L=wo*B*jfzGX>Khswp2fmqk&rOxOyJe8A_h$X z8KBPnyOXe6hlgH}&2*RJAPXb05|WtH#^j9%p5(Q)HN|^D8h=dte*3)aCWP2a zuGr;p;cIFt)4!WT06eHjJ3)@YvVt}9<@k7H9(B9#)arNh8Poae%C({(~x z7^!rWgA5D57ya@jCgc#OBNeHmv*2Yk0v1-=;_oXeZbvane1(vmEb+GC==Xg!gw(hzoSL_YTJ`IXTv=SBb$Mz?8pQap8U3gjoGf0C(r}rBry=X>y@A zleTpdU@4zz8$JsCzz~~|z9~u6`Ef(x7rip8=)DJ!f>|Xb7$BkUE#(ynj(lqw5AOMK{`p>Jt$y?NZJPqJ@r5ud z-lb73?KVDfar!@h{y=)}7k#Z!|H5N2KcAU^`l>XS^Ju=VKt!rD;15PdLdZbLxZ=_- zgs@fRH6G`M@-4cPa-9`R0HW;gt`k-3JA8e%xl}rxOXfrAArBA>L0M*-o0}#hPg;UW zh8lgPz~nb)qNFkaH0bpZ;o}2tY=SAfR7AjSxPe4Wgs8pEb|#?#+@7egFj*g;hHkgV zt6P1!>L?A_>Bo!1Ts640AcxR{i*Y zwIl*>SJT(e7hrw`Ml=e=HZy)eK&tDKfHMF?;8g1umZSnqHHJCGiMzkonaKg)+xYeC z+x6pXe0(IRuNk-S>fy|MG{xI{x!ruFZGyhXUY)5@hKITfnghrW4fez(BwQkcC`=41 z9g@3K?)fdfIp=qy_TqW@cjNV*w&LSpgIX z9G~e6NII3$Zu$2eI8!z0T8aO4j;L!4pYEiw7tG^pBPXZ+0XeLF2 zGoZ9o00>2Ne7x=)_nEfs{AaG_B$9OyF7avAbphi;xF!M=7bUrW$F}|^%MD{=<{;!i z6oC5?m%jrOy@FKgJ7KB4eYM^rB{FW`>I#G18Va z1&x|eUi*vLv}8|l+dT*J9|GkeAG!unRRJ}Ul2Z*@`|1j zAb*rb{lq|#A@8x>nvEH$Lh|$HXTUE)c9sWczN<*Jet3>8G=SH5p+K*^nMmqTHP^JN zwst1bvEhU8gx7PxtN?kH@+Ab_f0==>Wwp&i8hQC*<~B*&_oalcqMBF2S_b}RV+Hps zIb&PU_{N;d0WxY9_o7Q-qBN=LmXcY+PbH#5g2OF;yf+tdSt18KZI^=uXZ5dZ~(!^2zPUr6eJXn-VO@{#D~dcQ(FwIzk!8h(oO@Xpfd ze#>uf<#lRZ3z%imFQ+~RLF7_h!O$k_`;?%X)fLBg2Jk8@Uax|p$o1>cx5gqW`r`tP zJb>kJI$^>~Dk&{LN-ddV6-u0DZb!`H;Ksjy-vi{(L|9=bQOH<#f?8izd)Rk2RvU$a zbGx2%*!gOx(4f}lx`iez19{pK<^0M@c%ralD8>3YnP)a|Soq7); zGyZsy*vuxN95DLm=xAEq0YE&8Ng`^aZ`~Vyj-3#|vLST&REPpYGDE9|H;84puA6hjSs{N&^FZINgF<@Y@2k ziQnrJMAEf6yCCnv9+y~uXODPruh|ut&<*%$TLfLUX6QTbgWquS@%;)baUT!@xS?~z zt07&?7@t-U2jI-7uC7+k_4=U6Nu!7g8&E?KY*Du}aG$n53w#sVS^y#GvOzX%CqEyF z!i7qkYqje(v#jCF0s>@+_5?WzVb!{&7Bok3fhPZ|F^zV0blBb-pWj{T#{(SY(O$_Y z`snQUY?UC0oj!kNsUmo+CwvaByyh{0R|he`et<(_s_qni?&%5A;A|bOb{4X^ZVotQ zvgzq`u^1#6LHl6@|AoM{eZcp@FqBgzudJ!4f>067D#yalPYT;5DJcou;TEI;V9$Vp zicQ-I3uwkslQAli9{?pHz-WFR8-gT&bvV+H`e1jxz0Q3LNy8#yVkY34R*)Vg_a@Kq z;ptI&?}$&sVZ{rfXe+9*;pAPWh$A)Q+1N!vG>hOo=$ zbhE8LM=di6a8cuSKAi5TH{R%b(xC3rVixItAe8n?NFHg~{woeq0oHNp*5G;5u{oFM z$a4KU9q@}*(3@IZAd)ToD82MDCI&*>)Y=*$5?AKu!|X>OF-+F3H++v9vIV;WTQ487 za2PCPSXo(toE-*B0y>4saGAB3WfUGEVcXBoG0bl|kVnA!BaavX8DSFmy57!3;Px9} z3mL`5#9&CEah(I5o*2|1N$>qM2zn4&>P4o_VL|VxsI=dAtl#N$&RHM72snuKb~y2M zLqIRE;_!Y}@Mj)NOG{)?Gv{9e?80X9x)5pe$i*c9Tq!gx49TX|`p7I0R<|(Gcp@;v zMYPkHE0dc(J#E5Qy#O%)fR1L)rM?c{#w1OB{loWA0v1AFtp=6`$WF+HAcs!Sir8Vi zE(Bb8w9Z|9CIW<4&T+sLc0b;q17y|$>H$ziJV+A2g-!c%)bv6K05&7!kc#)gO;uH5 zpW{E4{y{-uREywpSM7%dOYqXg0DLBiIALB=PHKWo)N1wy3>$KQHN2KVpOB zB_=2L2jBxzMuvK}a-5JYCEzRwQUyYIH{5x}ylYlS-D7+vP;1Cc{wC{_Fh!|}z^ip55i@Q;SH8(exfk3_8U0(~5$FQ26 z$=9_N{rg|#a{g_`Kxul{!|;|1(l3HjxNljUJ$H`z@@08b1QmcBUhF)t?C6^a!1BJO5Yxt(t2$jHcz-@gN+qN2J*x(p(g zPLCa%doGdw*i?n%H}YZvzxKN|TuoaWVnQ0Gi4CQ zTW7I}7~_y*s95-gPm3gKu>cMd#Ln`=@sAq%uRCV{=_DceuR2NoZyU$fuU`oo=1lAL zyNvHkO9@W9J!7?)(C;o2681ow>O=v$*(ZpD~;iDe=T% zwCX7oL@-d}URz4^A`hEdNMa`-=~*rjm-W5&On~HQ1koGi2a2d^TR6=>Pv&p;OnCqB zwBPB}4*u7>V|JB4UQu6*Gxk5On%ULgzkg3Z($aeU|BEH41$;(SZlj0i&Yk=8>672R zT(3FjjxBoFRTi`;V_Ga9o7~TcEK}=5{ zE_HH9CMqfl_^i03pa_VY3Mr)Q8WX<>wnzQp7``=8$YZ^XwVs%4jv2v9U2I#w1K{KAR!Z z6rv`1ZTHGicw8~p1+4_=qnd_>KdjEC4iLSKjg2P2?d5CQwu$qcxUnx^KBE(MNCP49 z9bu~#)F3WeID(UX1+m6P)ZymMv#6%AF$R$TQ5y5tSy(V(@YDhpiUD+48qQ{BW|J^w z&_x44hUg~h28zqf$@%4MubL(W1%)wS0zl6=uN2r;R#&G0CPCJXS7eu52hXSis+1KO z{o;jRRh7ul$OuwLB;9=j|AyO!K%Jg}7+qXhIp{9V%gYM~(pUBzSaU*Imz*X3NLY)S$>h45V*#qdTXdSY% zv-?}`EmiyB`C)teaZ6iU^OrB@0e8H(bJ6YBPh0?dpdW;jT~bEyjcmv3D=v(VvtMG7 zqEGqnM39j|u1-r!lg_hGijIDYP)05;Jiv<&9z4iV&n84^-q)%k5;7xQg4YJYMx|%k zB3eCrV8|CQU19~n7&yyNl@luzKrD()6Of(Q5vP{$5CmB+0#L2#hiB*deGWlHwmS6M z-?rv{P)B|6dsP6J`nQA0y2%TbN29XPOeLru)+RrJ$l3sbtXRN zIYmfW3#%>~n$Gjx{d<9&@W}|sMp%U14}Pz$E@83C5o8>$tLRMv5HF}a@nhNFbntIm>*r_CUjlE-5TG9T9=Rls48u-Ad_0TNulJw;p41l0rTw+kQYBefE|8Lx6f<5C z3(AQ(IUIZ;x1k)S@!e&8oB?15sV@%)M^keE9tkC7V3gD`wbb!mX7|BY5OWZf&Rew7 zbybg?oP0ri0P(%p*mF`+QeMA*SkfB0Vxk{t#VVmCI-s&3*igp}4|>UVmzb6|6sS`g z%(wjaaimcCryN-8AV}k9RE3gqTuVzzZh&GG$tZ!|+yj*vZhQ^_#1Y5AkH)Xok3@=r z;F~d*RN6ovm|R}AEj#;)2Q=d8h&wlL`gil~6VM7uL%?a+o5qW1hiX(rS(#w+paCF_ zM~@yMQA*T#&WbJVOCH!5lyt7nbti|x1|z&KldCeFjh&t4$`t~J>#i+nH=bX3(*eq& zL)nZ(*nZbAe5xWPc0AS2L^Z(2(|j(J91%ePZ0#Hu7Z-#==4;oq=ABWKTRl>PRoZnP zJDeahZCBiYXbS8!DR#7ALR$G7c##EvVYa1+4CN|YXwh| zDr(UB%a9^s#iW`yIh_e$Dj_3pc8Bv;$_s6!5Uv##CZLeM0ZH-#D2NatK7!_p7W(tH zyQ2foZ)x||;Lb?B=M#tstlZp+ik~3H7=hJ89DLf?*f?-arul8YV;rckQBNkkEA{!5 z69nbyhbtdWH9xXfM}YO&G>kD>Md5#hgMjc5UAuP8 z`%?&F|cg;@v&YfT_*%HWO3!~KnbDi;-vEBqw zsxmUV0PrEbH=r1P1T_L4EJAv#G;It0X4`??j^sCQf}m<^lgtD@1nMY~zQpK4;o4fL zX5c}MH={q;akPprkdPsig!T5q$`s-RunX2~m=faRSHWqWym-=RExE~u&S>2F-8X1y zW|jt}M~}fLi{Ea|e0&eig1}o>ux5bwA;JrQ8K5RnrKJ=PyPj`QO9}B^PoBop)AKPn zuJc@Hub7-d7AZ7WwMH^rFDulAXngkUS?aolWB}Gs89?gM5Ccs9{F?o%bs!|4?RO~G z6)0f?^K1uQTBy*4j*bo~7Um5^c?=GL;%@{6HiXIm9uVq|MlinPMBz>?TtN(V-Efdw zM{8W}&eYh#Y$g9c(%u57sxN#SJqDs8C@7#HU?3nVN=OT+NOzY=cXz4?0s_+A-5}ip z0wO8hh;(@!`_FgpH}l<@J9C+F=78tyv-VnVJn!?oYweTWzNgeoHwf;6G_Nw@ zY-(cCQ|Ic8(9A2f9qsK+ikhyUAS3!4D?A8FluY1Ns`KBck3v8>+Et_^j?WgHwq5H!2i|9An}d*0{e z2O!ZJT3Rl_4F7jDd1<@_UuKS?EOdG(6lnMZmROyrCa!hb6R@#ifr$LXWcW$kK`SH? z2?+^gB%tBhO5~@SH$KvC`}IpeL4gPomyxZd6;;wNL-3s8B5iCE76T6;c^jLXv0#~C z@n9THo6}9UgXh}-8bQ$&6cTa)ZqjCD5Er5wO0HA_bawpPw?6{{Gh_|DPR{)qFbEJz z=g7&)A>#`DM<2d#_9JeZoUD6&D-{@h7;;g{Ls*Euzf4c}N&cfba=EAN=+S{x~x1jX~sSkm)Y5K2Y02(Zy1p5vdgW2ua zA@9;6L^6DEC>X}O*`Glcwzh-?Bgjz5c}PGdWHz`z2T|5#yN&v@udjfC0S)8zcQChLaJF>+N)%oHXha0T+sCr1i}g!bFNa+d9VY}f4mQmA(#-uv|Yx-dks?! zwP^rw<1oE|G|3zt9g)IRK;S&W7~SQv6I+Rd^41%8DtBry{I38?;d){Tykz>_@vi_? zl$QDV`85HW0`PqioD*3{fO%j)1b$oIUS7zZkSwr4SW0>AFCZdvmD7Ix8q~5ZCc`%l z)+cW26W7>p#1D!+Se~jws0k1(UWi+$65XDI}_i?rXt3p+emBH9T-3YJArhp-4)Bh zxp9^wsG&j5dwQr%6ZVTX%s2B2oT=pvivYC21y~sn7eZfuGXSD+xYn5+0@X**z^0Fn zc1Vaj0C*rv4&t}gM3t=yaY%J__2l`#?Ga2uA1~pS1mht29vssWdOnLK1R&vRLoJ9P z-YSgM2pY$XNGTW^xA^ivdZb^7#Jt;r>`Qbq1Nc*i`glyNE z4Ut4*O|(VP7^1YZptkIj1yXU3fTxh zf|yYau@)#<3@ACguHE?na=h`E6M5RONOp5_@CT&&LE>nRYI3Jv-|Oc8_>30=3oD)Z(LVD}!v?Wn2rA z=k2FDmk|vj$djNh46$@5Byw|eOPmt@Dfq~t|Bx4g=vPqas7TMrfBORg%V4-LX^_L# z+PVqCg9Y?sK=C7}9jC_FDn#Mx3AhRw=fVoX;i3+{kF!PH^=EiiM4Xphj`z&2*uSPc#h5ksg} z%vZy&b|CeI?sQiNEpRAHFm7lDK-%AcjQ54kJ{_IZ=f69^eA-u!ctGn`tFYiVk!8@R zspw$34bL?4hMPkI1Y|cPiNH0Oopue1?s^u11dn3y=OUZ=4TUz?~tFBfJ5 z0R#xW3au|b0KbvMfJpX6U((Xj;L!{M<{v~J1R?nuCHHEk|H=e&UT&@p+cu~eT`fD1 zYXHhJLl=kt#n41>bdX&IVLrP+f18X@*Gh0hXT1(rrwVD9oYc@Bj+J8XfUpLgi`i1z z3X#FQ_wKz|umfhzJ0#?$+u^h~ZOzpafHSmi7EBs7hfzayb;jz$-ubiSo09h9kb;a| z>A+GRNSHb;bdlwmLaCa&Gk&>Vvns$5I{q5^a+~=)G!!E0 zwM^E_H*YQ@ehT!`eMDj`5RQO4^DPvEN@XT@@E}z~0xCMJVm7LczPay7i=A}7K0n|6 zJVfdr&CS!u5j9L+&}DgEYT!%n(B%#25<@7@ETiMApOPELZmR-ZZPra{NG(Y3gMO{^ zFZQ%7p}LI2++4;`K6+%xz5e&_2S|TNWdv1CGsLIg@T)#sTYG!WKZ739 z^~Yp_U~vdK%2yx<;Qyu}XF{gCgwV?Vr0l+$CZxSUoHXD4{t^2+G9d78B)-Cxp)W%+ zk1V(qPOQBADy?0_;_@pq3Seb;saW2%lPDVL>ZX;8YC--&_$UZkNQaD7r5b&rUVgtf z9;-pmKSPBUu0pd3&b)=I3wsa?IE~A=+o7P?0xlcMs%yY!Tu3yUtp{Olu`lDg)9zwY z@d2U7Q}0nwP|jzb-f}f1Y49Gq2Vz31+5ftZS%gX2@FA3P+k(541whSo^ znM!yp)OqLzCafFW+atKiJ8WAU0ly+mFyIc1`${0gUqsnIK14KIL zZ%XNqxWC+QysF>{X%^HgPf%6fLi-zH34s1A4>z?wUhj5Z$t!0WO$E@>m?|9dMCa!X zD1-k(R#ypmHb5jM5cZ&7?P$aOET^=qXOHJsvPPjQ4Lt)(fHmO3@Nm7*AMfvr$8i#Z zg!96};t5c|Ddp_%6B4GLIfpMUPivW*2Nu+yk^t6&Rx={!WT@|eHM@!g7>GtsnHVPl8bNucemoK2=hqi4E+sWE=^a)(XdLoC z+_MU6jSW5(8FE)p9LX<{`uZ9{C53cMAfbhgjR?>qt4hJoZ33ro*aEji_U-Ytwr6`A z&_IqK#M{_01I&|)UbijOp%C%kLC1wsu^ukq5_tyZ(cxia$}$xT$PtDOV74M!w96L> zrcgT<>UEL;j*Jq{T!?}>hPvi!XlSZ^_RFLUL731Vt)B6>!X6XVfIbAc1d&rXokQ+& zSiPK4n*Ryq5<?{xh!7=ma-m&oo)@K^oT?@-`sOo_H#(01QYG1nzw9|`oX!FbaiBi^0p|viuB1Va zo&&gSXiiPEmbv(1pgJGAD8VZ8Y)q*IAKm(t5Cbok)l-Fp-LR4|P1*3{3NJnj2^5?tKv^^b%Zrz@jo>{Gilqg2clp zJj<8uohQkg(IkS)t{YK zw;)pYRwR1SiRs*DpBw5F0$^@`P2PDuphV<%y4?nSZS4ynQX<4Q>z9tG8!7oOJ)Wcv z^jzWBYWealAj|K`HOZ+>X=shIjB(BLf8pf(2;8M1VUeQwe0~ zU&vm(VUR=}_oSpe+q0er%2Ywvz+iIxQFeL9Ux;n zDT<>j0iB0AYmoxjaBi$?>1crR1i*4^t=laSxNNCHplt(;^Bk12{>py9ZhZj^&Yvas z_v3Zmo{@(l@Cve(3M;QC4i7Y9&TRcpsfPNso_3EV+Owq$y+07u7S^T{$>Yq18n}j9 zZc9h_dO(3l1q%#e(ldN{DJc(xfq66u=y-G5n_^k~=R~bDvBy3WNb|0s_TRXDJF}RX zkd)L9koEBJuz~K#r;9I_E{4lw{eVK_$LuF3^}=j~IRFJf3N8mkx9 z%@gP?wTi@@14qc6obL6AShBF-4b=oNGK;l?&<>lp$7Ot#z~>^|)GG~*cxB3DNZ*$r zgnNGabcg$j4EmV@QbIzJh4Sc%VrvhMjJzBrrdJq+W!ni`wf^-WNB!1s^MeozT3P#v z3veG$3XtsjT>|RSsSZJlNT0{gpP4lFU7~l|LxEL#LW-s4}yEvZ@LsH`H>nP8sENxl7T=Mpt#?^ ze~&cgf#`tL-w+cMVLns@KOzoN^qUAOu<}VK(eXNvE0AVoTnno93@Ek`KBc#}*J82f zzJF03}XZ!uSS?L za8504?T^qHbPu}Pp)VRD-pzs_usYy*eJ419@~2}1Sl5sA0l(p{s;o4u;YBd(6Qh0_ zB)~w2k`N?Q2o2{zKbmqsBa@bv2Am`S+zBhmFTl;Ol$35GbPKf7BHYP4@q8%o?m;$( zRAT^V5dkn)i7Fp>i?FSbKLMd=U3; zka!LJOiLhzW|HZ~c;!dPdP{1GQK<^xdq~j>4cQ}a_fVdxKcN&rC`_2^YfI|b0 zy29n?DTr2JqiRV4yAZwspb9Q@#o@{bN=?DGNtm7_pb?>3d$#QJx~g z!daV}6(C4_2WkuUS2Y4z%Q6TELkk!%P8{kErNa!KrD)4n=EGIw-FK;Feu#Nd#I9(OoqPciUR$^pF`aUIPZ5N{{@5x zh5uHm4S{OqBYf-9st>YJpk}vM6zM2L3LgNVwXIJkfCa-@{$N ze}W&pfGAW19a=>egb5nZY19Tv(!z2F0oGvv@P?*-c_AmjNH;0#=2)w=u?+DYP*Hn7HE zfe^I=#tG9UTB$$HXzY$4SGaS0Lc&iFCj6aO0non%(F3*`2J_d8-CWNSo$6LqR&D@S z4xti%dMM^H-}1k9xW+_845iXyfVu6@X4NqPR`y*t5j%UimA)2WcSP6KvH|_MJD2r= zNs1H=a8boqi47Pbkbx8j&UX=dx``2kg#EFMo}ly6?UFUvV_7VnVQ(P%l+>msX>P$Lfb;F4dcM5E*aNWX_&tP zD31p@s{wl7nu`ugrI)4{goIi}2w&8Ls0XP4X%K_$mDz*)rz2&iq+qM?&^OpKwAsl% zg_3ghisy4DM+DURL!ex*LCvC*E!Dm|>AW!wp^I=d9fHZfy}5eLj-sONfOV$|HnH;_ zuN!{3|M1}jU_8zxRx!FAZwdW-f@f+1unf@mG~7}E_#HxvUWs#MZZ16_I3QENbpE78 zXnY6>kxdI!{Oueaq3OnvQ&DlKD?9fAk<6baLbog;Yr=lLkcO%&F(NRf=>0WB_ymo* zVo>6y@nujGkzP1JP!>>6#qnqx`yrCzn|MwkA)%VXzkcu3p8>~4;dYp$*5)evmf)Ae zKRG^|mzCiPfZvxd32nA#h2sRp5S3QIF|c=1*uiPPSM{S?C4!Dc8L>!%(I@<2Ty z-C>YG5sn7-pyUi(q4|?=-!Tn(J&kV-9_;Kq8@u?gO(?3xTKT3gJmAw)g&jWs0^vgL z2SMiK>!;`@=rOg_m*|q-{#>+}Y_tJu`?q2)zy;{W}93GXi?3 z#G`e>SyWQdfImpyM$SXZJP*j2zN-bgEJ62yn<9~O==F$x4M7W{2BJSa*AnPU7Z)cB zZ$pA7h!T3rQgxh{1-C^Gya)=3|IO4=V>BFx5(b@pfBycJS-B~d)~55+!NEb!fURM) zMNOY2R|3fIzM~2#@Suk#n*(|Jcz400>kt09}gZB|F?0Iq?B$E9;(Ebs#>oCebSIesNUZ*q;Omb-;m5LV@Nd?T02(w!^3Bo3h>? z-G!GEr^d8<20L5(lWwbvS7t<6n|Njk0_MrKG+N>5w2tuPJl&BvPT2OzYC`@>XcBXZ zgQe`D?#KDl@%VfHGzukg_N`!WqHsw@cHOXJt_pdVq0tO{mi=x4)p#zf@W3rQbETww zYqEaZeC=ham)-4C=!H33AdYwnjAr0G;!; z+Q)c z*ss5O^IK~jSs?ygye98HiNI~m6aWKcAY<5~O1km8$n?$+$BtD z&4d57Jv)c`KG}fxG^%5<6)`CInF(&6`}wB()>!nMY0CK*BHQY-Itzaw&mBJF!j|Zg zD0FYb6Q6SET(m6qhX+rD33l6K?#~2@#jSAKuX(2B5H+L)6%SUKlG?xBsmkLlH{N`A z;Y;k3YazUM%}X?EHoDcmkHgW0D8GpA*2T7m{#O4w-C(VGH|!Vl%Kb;DC+sdOmEZE$ zv>brlzY}Fo$1+d6_Zz-A_Py0L)$MV>3et*TcHQ+xc^sXyW_Cm0PPyxi92`OTL<_CN zVymd@EgQV?VAJ5n14a!2mvMKCb*se5FV->kDk4f z`D%4uJk<8LAK}|huf8gsqcGAXprzY67aL0^PId{b4mJao%9^L7Pn}%gr|$auYtH!g zr13Qgb?)r)Jh>`=5@HP91162e{5)rTcv;6))k))%iPLaaBK=V^J||2#Gj{dvFxPMl z=e6QyN;#}Akx#@@TKiI?ZaztEy_`xp)!bEJKNI4?0dqB$#Cj=Uk?E8hZi{m22v-|h zLSS`Iv(R~kS1g8SB(aEykm&w$Yr>!2pLd>$-g#GWCbL4orap!igq;(u!&0)|0!V;- zDYsTKmN~;*GjyAhYP_MG)c)Sv37NRDFte2NJw_>Ak?fs`MZe~(Jmb#rCAtC(V2R*; z{G$^#aXe*4v}Mu>{-4@%zPFT-r+3H0Arw|eDxb1b%_1`mR=dl>_(o7HhcK(0xI!dePZKS-9$PTO1@lJ5Qs?I zwIyUjfOrmDg!3B>h4~A|E{|T%5dUl&!9R1$;TY$c?5S37p@_Q7clI!N{rFOvz`2Om zj0Pj?rX?H03NwyASL`ho_|C9U6>Nr{;igkwAc8d)YkjUg#eqIUHImBj)$LrHMn~x_ zWl0NeFQm1C4y~o9HJ`iEztQI=!`29?Q}J3W2W+^ za@v%#uy16-3EeQ+Ku#B)%fC26@r#v~25iZ#((>lxHv8t*RkGFTtrCCQAQ2lg>Jn+N zQ~}0_rnZqQrjv&1dZc@gu$>+3-LOSI5g;RgrI(UZx=5{0?O?y|0V5t9X5;)zeH<0{ z@U(-?XvD!=>eW(K_C=Xz2kC^93XF=bG`F2#%5QbIjYx};u(~zxMG0U z?eOiY5D5hhs?c#lA@9Soad_Sk&G(3ddT#p2%9r}N*b*CV>+TKEz$w=7dqmpgyN<5j zQtWp)UF^$D!-)yX!!JklQ#m1lqF25$Juv@ZY0N$|>su|)yk`k#`2@aGYyN2*@#7qe z_Q=K_s%+0-C8x{rui9&0GuV+ZHsPyyrB6DLGZ$lFw)o{bwab@Tu6jdLF6&7f|0LhB z`pHkEd?$S?UrhVqF+uWJSn!BB;}s3m_r#zJv^6IK?L+5Y&{OKuzyV2SwN<`>N3~N} z)n)6=&DK@Uf`o)bS`G!wZ-ndoU}9TluTy`PwIP}%zQ_6Z>Y#g(v{-DG>gF`T#RP8e zE_E)gmT|+)O*A`7errEWbm+lyt3j?>6;XTrH56)>bQIf^v!rqLal*YvM~p4F(Xb)d zr+d)VcYmYom9!Rqz#7;4hnN6u+wV>`Ye(^s&oNf5oZOD+hUeK+4iFi%XZA}uyPHUL zcSj)rCod-z{00Z|Qj}AoER)6#%yb}Sg;hs+RJX2pT@$_QyD+ukx>W?Dy3ag~SI1|~ zuWwERbkgsWmWS|%8kIVqHVW|{q-P4v@^VU^!z=7XCX)*!4|OlL^B$e2#)}tdWt@y; zHYeepLbFrKT#*Uhc(OoN#xEZ|eKi9bOFFra1SOw3Iv2GuuP8sQ$r2o(s@K%a6odiy?eS8*e*8r8n0m zD+MrBTYcT{(o3IG;K2iu{@SiAQS;7Y-1JvRs}wiB*0^%vJ%E|xwA*Ug|ES7R^{k4; z%F90w7g-16Y9v-4&T^DEX0XuV=4EFxzz8L-mlvQ-=)jLrzfK7W)_C z@dwn>#xjw~fdI0(ali|=SJ6ES*)}9OQZ!_BQ@QnWujsgA8%qyHh|@-n5vei7yOB-)l8C%cz>DG5s!i) z@(Q)SP?7&KB;q2)zUg8Nl9DY|?E~4r2F1TG+~ND%dp)Ar&UPu7)g&d{UpVt>B#&c! zOE*E*MLIUObk>{~hDAf}_^qCLsrnBX_Z8JDT?ip$K$^_j^cGZxD1@tFf+`3%_X#XF z!LUKl*~4qQd9}M!c{^xwtE4KH=gxW5Z4Rx-t~DsFoUX6>bgq>BjUZ*8-kGfaTXo3} z7lqonFu%MSJY2a_{TalwvMC1HcGEYU;jmM?P9CI#CaNBd=Zb{%z_k$8?d(#QO8E=Ut zmw=wvu3wK{ETGxQYz}k?sHpnn)4hIq!)iK!UA)uke5L8~LHeXcyDz(Q@prj%)Tgmy zOl@F*`sBGpT7mWbQMz}vkC43A41-8P`wuBmA18;A2S~hh%QsI=V_HSqk5^rXc zifx79d7+W z^}W^Oz~N$33SN~bf*aO3#r`CYTLh?kYi3o?t&I%Ryl1amDG|a7q;(N<#&$-4(NV$& zTfD4?eeV=QXb&mh;-LnlSnhu`7J-1yEc<%8eq@XDY>#{x4jkQ=`&6NrqNynUMe!8cY=jCl!W<0=U9DX9YZp;YEBse>A3<4~ z7W51Tc~7+axyFC%wcw-Lr`V(aRuVQ~n5dMF^iO{_mD_`jyf+68OQol1tNpV#ER+1J z%rOm5wV%Bv9at+>l=^!1`BQs4Jb{t0Gm;$_Lc@EZ?#NvB8;Klr8p@x>YPjq@!b-!2 zC#0Vkq#AkE?`fL!l5jcCWF!F_)XGJ;Ib6Ez5&TW4d{2Njvi$AL5EScK=oK3puA)>VE)uU2ZK$j(-f=5^rw zP9-!yJUu_crgUJtPI=u)bSK|1S|wT~dH1`xD1*$ii2fR!W6GkQS0uX!tt>a6JX2eE za-l9h&^xddPN>+~>ah3PaqT|9@R*Cmuk|lnK*5jw!S*43wp7vw-tn5IucgjAbjB2W zzlp@j%nl6v(yKO3F*c{(I3wuf3A9ZRB|~Jf)Rs`7u5*Xap?T@sE`e%{@l;mc@@alR zfZJAB3tr6IXYZD`o9Ud4=j;3TivlDzQtTE@$ak4BrH*X(hZ-uFSZr5WGI%wGe?&{Y z+I<^0x=cDCY8+wSlbf|uvReC!G3()q{Faq$k^okzuzhf<%Idg!ofF-xXm?P)THEYYwy@*mzDDG_ImyO)&G?=< zh4~|O;cu7J-H)xUb@ifS(eC8DdYOOs$$BQ$F}sHkO$TO+TDJGM27S!y21YjAdp_D4 z8IOnl#;S^?My4oKhkxJ#>)SV)44M@(#Tqdgtg2JC!ersRMwl*B>$MO^2dB(VS;=Fk zM+dcoSiM{aiuJ>a7`U1VNCEAHk#T7}cC>*?rJ6cV6l;Yme%a*94$`7o8gn6ke(#n5evxxStv79vbeK7uhf zXN=0bK6au^QB%&NN?|+eiSJ9!{4!~vX>Gl`S7@RF?YegI{PkIci#QCmeXLa#hLf#5 zL_M(0{u?D3%47Ux zr>jWttv{rhjd+owB=z1dzt>jc*>svP z`cLgzaB37zI&nK89Ex-hzQU)WxO2&U5Ug?~gvoCH#Dn0+PwQx-f$T@=85xS!@qFD1vs!!r)=x@M95n0mBIx6sD6|S=t%8sx|9i2WW>evszxm zaBgm%erno{CAWW{x-~H9KgJ4U@a1z7i%J(0p64~{SU-v8u+4{OS_?GvRsU0_%jvWpz1`>KNNyzXBxq zJk`}(uET7|I)r%d*#rjcN{ikTpiy$fIOG^_ydk=dg?y+Ywmh($6FjerXH_$c?9p{& zfpf^;QQs{*rT)D@J-K`T)_A> zg+o(T)>PTeE5R~Regsgv9rIwPFn=#*r4E1T4)wec@A9ak{v#zlE>U3H$l z->G>jsTeVB&X?X1WwmXQ5$?^@a%I&OuqonU zhbhcdxV^wa(6{y9kfg5tzYbVpZce^o;1Rr!{pG(&$+&uTw|tVuglA>5S^a!HSmLBJ`MJ536jR0-hHLT@!CCaU)N@#9Fc5`HgJf$98yD#?`|JGtb^G>JF<|eVzT&3%A(Zy2#yXg{SDu&I; zkDH~Elp_N0D+7qCBN#Mm)tg#7djB5X#Ni<^8PB9q7ZsNGs82X0ndobSXz-O!o))8G zt|=;Pt9~CP61ES{u zd(8!JOD5DBoTy8LN2@jK{OYd~ucn+kbsw#myO}oCC50JeSjB4=6BGHpAD8g>^w1p+ zOG^SLC^f;)2VmYhafdROt3g~Z(r`2 zo4;-u{TGYm0G33U4PO&VwP#`RBEPLwfu;k>`Y*D$LRK`xZ^FQ>12>tNIe?Lxz9 z-6D9vn(fv(5{D~NL~6H+YifQW6Zh{HKWHdpOv6=V%#U+zN$r<@4owL^r5Kx>7~@<` zcwp}-#c0LwNdBWntkKC)P~i;*<)T-95#~iBc3@&QPxL-@lIk2Suc=h7Ka5#ZWQJ z3BI(qVX~w3d?$e}&+nFA?HNSQB1iG?ywP;soU##aeOLpw`*K}&I{0eUPVw$i;NCLb z`Ypx>lijB4q!NGJ=-*yx?`-a#tXw+9>~gyztH;lR7s$$waeAnzolmP z%e@EG4Af0q=(xZUGmW!?W6*%Xj>Wy31pLe>tri!D$n!@89Pd6a24bEGJ zqzhkg;!7j+OCQF~*Q>1*k=3WSe6B(dCaX!IRFL?>J;u$umNz2>v$m(&TEjWDZY>da z!}O-W$jUR}eT+IsGli&B_xqXEFW*O#nG3pxAinfhJ9Dj=7w?aG2_48}P^%h#w+=DY z!xxV$dKoj+p#WxV?dmrCz|$%yG5g$Ty|oXU+olf@(^PU&irymlgp=hPd)zy>v%m{o zEe}0k2P9-b=-fre+M9Cj&Afi4i=&~CH>JfM`sm@=&F7Fm~B0i)7Bsj?1B!=VNdnLw6-1+Pbu3**OSrqz2JBTiwkt9~grISyU#QZiu1$NW znIN~IkzzwT9jjNz);Q`o1p%i;QaSN}%{>|JXDk-mTxrXx?5j?{rz4x@2R*`5i#K(U zf2yC9J#%z&GU)6zG$PviqL7by1Bp{-Wu}%*JIii0pwDAc(s1|lxe;>Xr@AWLOW1oG zxt1x(NVo{LnOzAXVb%*k%n(=L!`||6!w5N-d4r07!nYTYOrtrqzE%m9DV$W5hPq+O zkO)*1GTEEd)EWlArcR;BD)%_ZCK5`gj2urURkys(=ZxZAy6jdM-MR5?KMLOP%~2R4 zcF2Sf4fEqyGn#i&BfR(J!SH*{CVPYJ`Fc_faajKxwQcX@=$KL<%h9eFGLW_KhV1#R zg0K}kn!^So6>j%rfFg+yvuk-5)Ypc|GKhuhf)-Xu5YxnGD%kcl74!qzBqfQ&SHMrh^tcB zb-G(WALuo5vaj9bJ2bGm^&SabtmcDnn69m-i$;X}8)mi?zv@tT=@HQ^WcKKz?yzjl z@^Idh&8d;~0_Bu!1^Vb@h(HD#r%Z(I^!HUqdEa&1jS`EBJ{r9o1hE@KKY9bM8ny4c`ucNahch&m+McMbV8hv&-VeBV z^Swz130dPFSllJ{T*OpW8pD$TWwva}UB^|SOdyU{% zi|fU)D_#K1AQdnFu+P|A+W$22x`5^Kl|cZR(e)O!cB{U{SNyCbsEZGpO}a1)3$33x z>g~MhK2rb65x}ZG@IN-ztmQ5-FPp-`LL0B3D~$$DIclG8QR zwbMGbYCMCTKm*Q#-ZjvF_8}Y6$r}2PDFWzri1UXkq@@R~PmLdEMYEVD0ruQo@;>Xb z`%(*uN7v5+Ja!PQtN4UBSw2J!6C1GJJ!)QB;}*z@(|*u6tBeuD+8XUh8$=nk^iK~? zVbBi7VA_CbhddUjj1Lz_5crp>SulTUJoN-YjqJvud3<*OLzm?znx4D9RTL2D^Kdwhw5OL$2|vSt3f*C z?~Ir#3&2nyFVO>rB`tS}UgK>)b6)l|sBWzjiWm%zEnd6_(Lf}SnTbc+9v!MHGM=^3 zO=!+16QBi4gU_IY;Dl>ul5H+kAD$v`^<4*<4T zalr*Lop#w9g?s@_QyO%7^88~yTJJkQ4nK`{KpI2g^% zCC#A1M+Z07M}57RD_0@d_VfBT`LPmh``8In`iCJhvhJiwHoHU3^YpR@tAG%YYZ zrg!%QuU@o>VZ#qJ+}dXFBY_TE+*CapSE z?Z9UoUm)qn;zVs6QGNBRB1<8{KN|~Aeut-uDh5+V5mV9sr1`cxHviZ*U+rt{bER~= zc{cZvvs2pOSqn=$p~DxKP_uJC+Ma=dE||mX!na*hC9q>oM6z2dLIS_iOF2A8b>VJv z2(hT<_o4FLeMhkK`g_3n-0{=5 z`eEUAIa1xKH^Q~jgz93Lh*{T8=ri?wur z3YQ1Mz6K*D!^^+?=XxWkCE_hHl|xW*FOP$Ouyis|ELZ$mzP8>e9P0R9zlT#j5Ozno z)3u#k!H9z8{tnfHq?Q2@BG1)4Li|S9o@jY)9AA4{ z_0ib*Q10+OE64)F(?_e$svWN6&VGT_A0BO%4QX1igC(=Nd^t)Mr6UrT{`mE6o*K72 z)T>zw-!1J5N+y#0kBa+$6YmglV7SBUdgo1KuM&qX>q%+Nnw7Hb@b3{@Z;1C0S^9lqG_Yi=W-Cs zoW6~T`ud#oq8nnNjI)igGwkIf5)Ou1v@=fGS+npy_xfx*rA!=6#4 z)nKKhhupMH^84)|L2ASl$=No>msOh28$YBJ4oPR|FBa5nx2uN>0vtyMxIAhuR@RC& z1m`8W^8tSTxvvG=7LpL31M~|~!tOab*Y@QzWVoGU?@FlW-^D1o^^GEfkL zEO0HRzCUy8tYaUYk)dz!BXY!-FFf(Hg1GOVRT;`7c%d?rk%YrcqwF5-O34jT?0E#z(tIwf4q9{%w^X>Q_g?UAVRZ9dLD)4D*KGt5H5G@hKQ?XHN9 zt9EAs>u46WdN-b{ICzrI4hD+AfkfgA758|~`=jfGOGLFC{CT5%8Amun#yk79gXZi| zVB~{~4Wx(~XABrI19s}vteBiW;V>p+wkvx#6JP}Mz-FDqRc4;GRd5YK>2gyIH~(QI z38x|Yxb(81rV{6k1+=<@BH(sem~t@vTz8fjHoVfTB=-VpTXe@O@q}VE7*cB33a<>j z0|HK1q)R6{={Zm{V(A?w-5m32`Ydk0&LgxBEdDI;e0X=i43&n;)t0~Wq7qll6!Z?v zkr?ia#-M+ixF;Y^1=GR%$|vh}ESdOaqwV=Ii=yg&n{%a@4Da zF=tK4t8}YJ$lW4gX(Tr57?sfw5h;Pa=m$LBq^KAb-(wJNGiD1vMb)x8<)Z^s2p5)t zfRlke;~F0q3G9Tic(tQwoO7TYgbceHz9~JG;n*dnm7hb@r7-DEu3g(Eu4&+Zklz*K zoc(*Df?CuRWv9*6ay>#T^aJ)C97+S|!*&wRu3z`gt!u)P1WI_MbOJ2)tFNigsRo8* zI(UMElb3;KdYTF4reae%&#m9tJ`h&c{7(b`JuTG|nrC+QHLP}7qoN^*Ciz* zh4;k!Ehj_ixMV-A4g^vKpza}`5PbwhLIfD}*^tm**~wL@f2evk*x0MsUbS(;6`|yA zHklhl)SZwCrHYp8Nt3|JV<>=N{uP~^yZ{+zD7NCY?d^TA#uwlahWQ}lE0IQO#DED) z#3v!kW~BQI4(wjX-D@nK$V+kfmaUkP4gfND)7yQ+Vv47#q>L+&wVon8U|+r7jDajb zJ!NQtQ4x4l(@CNfsC|%<88;FW3WY;_!O23o7Zj1inS%gy;DGoffN_RhyAf+{zWcmt#>FHrsl{%*wHuJ#Wl{<=I;rl=}EA%Ix#vxv`BGn%Y95B!Wml4tj zxhEr|Xv)X1Ag@Y{k~7`na;>w9p~M%d<0ULL*<*|2wH5|?jfCf&2jKD4K7A&Y3x(ga z55~B8p%fm-k(D)P7)^w4w}s*tK?_K^Ybppp?1(dep9M;=DYB7VN1Q>UgAD9E>rR-= zEFm3Z+&ja~>uDMcYhAy~vBIMd!%0!=xbPv8o&{kBjk;!%mvh|Sx1qYVo$v>qmC)HA z6`SHbdlN0YBUIwDX9t+00LJnQLK+Z4Mk5c@++FJ;Cj1DMx^T$Ue}1<*H858v7jjLC zy}QQsZDs}`K=zKEAgWx=S66p+D(TZq*w8HxR8S%tJ!aNi(bCOhrP?oSBt6mS#$A0^ zZvJ;^Dd|A5yVo=8tQVZql=1R!u%9}vZ^~zG`XNYZ4!($xyPeDLNqR7k|ADTv6bSXa zqr2&48^>ZBrXwj3h6kC(#*XKtOMM)~0QHXWEk9 z({`{g-9Gjv_&lxRK+R%UrP)}Wq4SBw-=^eHAjW(X2L&bR<`?5#Oi5V}9t+EOm?zHf zX+*5RnS^Bna43mvq00YJ`eW4YHXLO&EwH{6-*@*2of^%<+k@VjYUL|%?{H~VOQ=sM zy|4t_-`PQvMmi6OG90Ci@d_RILkWFyISLHSJUq40Ip6LLBEr*)AQscR(Wc9uk351? zDcozcYA3$EdVUVHl7lDz#Hw2(*3#;$r>7F(<|@@TUufdrfo7-r;J?rAZA{$nmX;vn z8rJl~2fd2)pN1o)6OQ$Z>0VB_FU+QmEfiMwD9k2<>(CNa(ezPIJL_?FBS`u~jO1)x8A`K--3@4%|6V1Cm@a16$7ndyIxkU^CKxU|p(3+E zoz^hX*$|pzw6*({vm}@bH%wsVjx6?gNZkH3ondN&)4npF&>lX(WV=GU^Bz0ZYA%(_el{qUzFRaxZhrNZVT;UCXISs7 zR`2I?%^H3SbM4#Q?i!=`bZY@2UV9C`9R|!qZYpsRmZW72@p7DkSp1pRmt*d;- z?p|Y^o_{Mb{F|X+c7=XmjrP0L@}W2SOd=b{A0M-%MRAo~x6f|-Uw8AO=pGo*_c;2| zd;T%h!c}|yvQ;CM#d2&BE1yFda=u)`4gHm)GK!iuIlt&7Z*?& zXO&!Q=7k{!Eg$jqBY>^GnP)`?rO!_e!JDxV3tYd) zBS^xd`c!vWPbbxk-BcxWLgy0JQigs4>)8|2pd%4k#>_|0Xx{MQk{PWl)z!At3_lUU z)-St$ow^iRg{3>abj7}E?cIW2k?u6BnCJT9>)X-F0g~yYWbgp_qWj3Sk>w6Qh^wWM zg*JqNB*7p%Tge1_$LGeGKUA2UD4xntk$hF7f>{8h>ul#lpmuMF5gzUK$~DzW@cPhwEi$+ zaJzmmPuFRaf2ons@Fd{D8jjh!IW}Pl6N&~21B!LIkekLuUM$8}w|Lny)6YuH7+`Rp z_;~5cIP-{Bwv<(W{NI-!9r5dJdselwmN({4!l4^c@9Ftwu05(C)P6>PXh5cH)tsz3 zjnuD>uVN71X)j?$vF0}nW2d4X+7^BDc|DwtI*#TKX83!wTv@Ie&tB+}omN11R&f^} zZ>ygLEOxUmqCqQiSqTIqRwVnHU$}R|YEI`W9^07(eM4I5INtRkvU*w=#r*G^G?mwk zDC{?O=v**QvO9^=;cKkQ)i|P-`pR514#f0ORNQype)i51&tt~ zR1p!7Ad2FsEO`jjQUpUFh@(i@G!hXAi>54zCJ}=y#z60jj{VhX?bzu|@85Tq?|k>% z`%Wh3-8&9NMj`|KiZ9r<*KizVY~|HyJwBSuw;8nbP*|51w>5QzZnMZ&S07ou@QBgA zu&&6-JqP8IY}y~Ox^E)&h@w)pI{Kr(bJo`-v>I>}ZFV#X{VLt2^jPTInfz#-;$ynL z_+c~93A5!V;wk&R4@#$gSmh|s_|OsdAh5)=KgMShM)F7nb)?o=hplem7BP+*)tNR< z5k}6{0Xd>>W!Q066zpgN${lJ>&8b5a@K8taL?_*~$FF=Qxx%^SPEx<;P4 zxxBhjsrv*?+dPPXv6yteNG)wiB634GQ4K!zXJrP|1ZdN19pCu}Civ>hLE!8RNG3GL@@YsXwD9euXE|I36%kz|LS zlZt^kk>f5Ngq}hguU~&VP9je2J6UH?*r+yT{}Ci=jOUz9h#f?iysUUWP|uOpQ0^EB z_!;EnKBVyF^blYNFpl_{F-iUU*4>m<#h${sWog+6Zfx&d%3 z1?Cvl+RP2|4hczH+9K!Xu9HCTY6hVo%n6+_tF3+Yq22SQx)oSg6I;JFfu8@NQz%#9hp4Hcf{!a(kwPN?= z1h!*bs|il{5Cev{%~gs_eTfpfP`m$e{h4KISDr2hc9tPa69+5X8>XG+{)C8O?mSWg zyG_yuu|rRC2WpL#!#)u)E1Vg%c)%p>YegEA^e1SBB_&F6{uNuBdh z6Qe1++)lj)pm<7zXmgxqd1Sq7Y_M2G1c!+;h_wxNkIX!-o|3 zw3@bFhZa`eyW0a+1pP@4Iv`|dGIp*D`MNgLTY+%%{E>ZJ%e(c>3+PvJt{F#u=G>88 z`cJU>sM;}#PLauaQq{u*UP!+W%P)2^`i<`(K*jhO=jH%_M+ z;==h5EAZx3IF$8BpJ5IvOnTN8H`l?N$XiU!R|v>Gd4ny~Nf8z#F|)YwKDszsNJ%4ds}_h$7@GNf72J zTM~cS5LdD&Vn2!Lo5Le57G$bk*ho(G;at0J8+7k9G8_&&U6r~Hdvw$+bxVM~j#aQL zgBRsW%e^j6#mB1Fr^V+kO{f0nmEd;aG;bv z3n$KTp!~V>3OxKf2CaXy+Nel8PY}RD;;Ri4dih~M?R9FE>JN!m zx~29NJO;88N1L6x&00{iZBcG|=kqBRYXOj$>x2csb5}dRSO|nBD4%9H`)qhFS6DXs z(mdQBjTNN?1%rsTWyN>_hp*eIL`%9^;Qbv`X}GxKl1H!t*~1?O!21+rgCv22?~ZHiUV3~iMep8;I3T;-DtQTB__uqj9VTd-iS+sVG@;9%7+s906vQ2+6aek$sMlCnY$xP0%VWd)eMwOA^+x~ia1 z0`51E#?Sy01X(SiEHQV~Dh}MP>yLH>$pA&F?$N9=f`dH_Ud(X$3tQv=vzl~HJ!i~C UYvv>e1Lwfi$!li?X3vS=0StSyk^lez literal 59887 zcmeFZcT`i`w>FFig#&_(CLo|99qGLaiYSC4Nd%=y?;u^eM?j?`y%&*$gc3k{ReA>r zy(%pvH0g%=ZV=Spx%Yne9pn4{{1}W7lD$`%Ypz+IXRaNfuBu2*N=HgSKtTTR!95KE zg0txa1cZ)%69J#t$D9u#Ab3ge@SYsx#mHhcc{tT<{FW?K@EnhP+F$%lX9Lr&7Th}b zPWavpRZETRbA_P3n;bfi^hIo+ncU6gxT~{gWmy>h@~Seop2FKJjV%iwu7{uVl6yOU zMK0i*aKojCtwfv}sw`{5f_#FY!|r~wl(~4=uIE-f8ndy2+@b2g&pU39d+j7)B(@!8 zg@ymV;de>?|9|=aTn5OS9Aa)S@=|%t3`#*2^_xu08**~xPzv$v+o7u`lbr(z} zF)3HOw27xg+GDG%fc64|1mt_k1xBfPZrzH$;bJRIC{s5%y@*(t)Y@CenU>4H2mJB} zpZ(N5HeoWC;aVS`9K%F$M;J|F_+{v#_twg!tjjJfm3E2ss)Mjm6WJeky|^yeB;rQR zt9MNjY|eagn_H`|>23d*)mvhLWMi$kKxXLq zM*gmI=cy@ev& zDr$iIVoXHTc>9&ZhiR&fAsw#tB3{S24tqK}x-l=vwGlfg)I>5yJ`Ae2p~C=%8qSX1 zVWc3XII>f~&lNr@yKi;=%af9>%VfzBW}<&z9 zqD9AVc<-yx$62N32c7Lp`yWeL1Up2!_L?=Cm72gkRtr}#M)wTl4dl-pkLJ!_u&Lqu zzWg`#z-vbuMR#35i|8WbqRf3Nb)3ckhWeJ5k1wZyTQXeyc{h_D&mkJc5)h!Ql~ z%F!wB6;3bGJ#^e##o}GYL=JGbz+X&dP3!zf#AlVFt=y9s?Gk-Pe3nZ2Ba6DX3WU1R zUJ?_qwTA^ZL;0WEllevY)A;}^fAsSb@gnctv&lSM3MhMqHd3iyUGDW;vViM?t#JXv3oM?{Oq1$Vh=%CDRB{LHgY{ zM31?#Lv)e7K0GZt0@1p1s2hYlldRjn^-_HnyxLdVGRedq`Oh;@R=$NJ(pCUm2s#&H za}yz|p#_5NG(jFkxBCf1OBqNfb!^;&I?k@|!05QGnRa^p1e*1sDh6Q|wze)z8B4>Z zD6N8<84u06&LngF?&A|gYLH!hh1us^Fw&^*ajPIJ1ZI=iJhUGa9B_+}dC_f--vzx; zBa$ver4sh&LSQQj_T56CsuWqj;R7b2BBoI49N3Hl-A~|;6!I5n@oRJ9HkmKs42L0NJr`dYZRib%}J`G=FnT&YVa-O2&0krc#EOWvh*%||Bg-chVk{5MVRn;>xl*_c0@ z6QZ)~%X&o11c#yBlOD^rH|y|qHtvIQh4Jbzl9DOXr_S9{vXiD?{o~YO`VpL;A>&aN zIK6OjioSeps>d*S?he0RPG^{Wr!7v8l%fqJ+1J;eHlgprxEF2ld$|b2)f(4Pff%## z3g_kNrcefYbDkEVHF|c_z*xqNT3^sD+B*lk136Eop?Zh}c6LVTrkp-gGCCFebv2Lm z=35^d^W8TsUFp$qwH4<|vYWM!%*ZmnWGG5?@4u@?$z|c%mpx20U(pgagO(y!GofpXufH2a zTh2L~bhN5#D8#g`0QS98y`GK!ppBi@vS~x0Sruj0A<)R=#Ql3O6BOPwV$E80y@dSm zWg7ZzT9n3kPe@3pj3}VvRy?n}XAeZehb0P@-TLF*kwVZI{<$3Y8AjERSs+pdk`K(+e?qTzTZp|7K zGK7UkoPCzfY+<$st@0$~CD%q8Wi8nOV$wq`Ld+T-Pcg@!Q7mifSVq1k;eGJ1cwNi|Rc7 zhAfmC-AYbBHi%uGtgRZnMsO18ytJH~iMg#hI8=Zk(k?FU8t$7+2E)XXLqb$BMHZ@q zW%jDuFV9RE38`CISt@yQo1v--2cO|L0K4+3BEjf}>?6WLKVt6(=FLhmDT2Au*dIs){QN{FW+a-4|kE`wmQmGEkv`#!*>2-F>%|6(EN(Q_Ts0d&3)J)J;!x;*OR zM150@6VH%FTh^}ORAN6!)4cEwu05tih9+UNMoe5D+)LSy#mDcyaVMN*=MizzOS|dMeykgT!jal%*T&GFp!5M;QKfN$yo3d)o6m{j>#(M?vi#66G8l!w*2xRyU zx3oPOZG?9X>_YVFRRvn0A#?AWKtsYX!H=ey!7g1%4(|ZKJT4HDp1;U%firT5} zWbJV5H1sUDc1%i}z>DW{XbmJkHPku|ardc7_E)2g;qIoJF=9>kym#1RZcc*jL$D1g z(pbwJ?|mIXjtltTa>ZG^KoYc=jc8xhrC|M#eMV!HB}T|>u3OE{s^M?f@4Br8;i<+L z_yR8byf-j5R`9ugen->Q!KAlqzoi!K6hb?FAz_2dyMT}n&7?4jR?=*6$O|ZVGMv5n zPLWe0=H3j(F*Qivo2Rzw`<{-yf^or%6-0JkV5c*DC=OCSGv_khRH)}jK*0CY|05Ho zJ+|Gs_d|ppwC$BZNwSI0xe&(6-woz zYGPtLQ@iuvEW>#`qAE`5vhs};L|Ic`%`>?Q@ttwkW>(o>8x9&EO2Fm7Qn5~C5E9~c zvF`?VVPbQC^sbWSe;!Rm^3hDuiWL+e!RVK(^N<@# zsr-IT(Ajct4SWZb`8IP%DOIcy5}}lJb3*%9Z~dbb$g}(EV-|O(n`S591ZeGu#H>!A zU)H98$0<>_)6`t5-J`q&e(27q>!>0TN=|R+IL;vi8)#-!Ztfk@8rTLb#m}JpPQRle zrfY)}8mn_<{Mm5M1*j(Gn&gZ8y#jM*DZ_wU)N6OW?``*_pA)qS(*_9yj`goLg@$bH zXliTsl>b(h{v;Lhv%-H{c^u#$5E(4B$7FJyXH~rw!x_vHgU9#5r(#pDHd2~!+jMqX zpiO^#pURWu55Nw6$*^!ll?+&nmkU-*Z#43*;r}|dU|%sCN#a;Jbdkk79<_;*7yJbc z!Z9Qiq@gUK{O?k0f)uz$EVSV(6f`<74kCL6#l+ukHgIzMyRR9&O~~e#g>&R4Tg068ddr zeE}6VGxmG`TdFRO%?QhOCh||4cdP#G$j9a-$>|FXMC1-#W3Kx1 zLbT-6-5tq^%M0(%%*=oJa$zEXWecz^Z5?W^c?H%Ji%tjAvM?^czA}%lqzGNtwLpuJ zqTYtS-YF+gYRZI*O)lDgcPSkf=bE&$$%Xw>qpnSsc4N!F5!r>qKR?H4)=XV!5t_%v ziKDR<%MvKk&r2B*6-z^(eH#QUG^Pv@QF~6q1!FGS5-}AOKW5+;joLV5^>-p%YpHq6 ztt@ft3#v~HEiE2yuL(5YaIUco)NRgrg6W#JLM@@Usei3+zoX^S)6$C}qU;-ybRV2S z$+)Dkq5v&GQV;`gbq`0f#?whITRDp&M94-<(_7bkY4BBx_abhX|%li|X z^)v4Hofe$hvY(&-U*DXjey}rG+a%L;QFbrf2+Go2c*V%y`1xYXWy}mrMRlPqT6H0i ziia&RF?o7-n<7Xyrf;%B8t@v&&Nf~Qn4|8N)yckydWlK#=$ggI9=gpNd2eRxyM=jR zl3j0xeN*$3^MV5<9Zju_+X_X1lPM})q7ScbLXUh1Dju#jaH(zS2YrU0ew)<2{ zUKa8IjMTNS4HZ1Vv5$8Z<<$tLL+m%(xKL$KO;qiH1S=(k+eSIoo$7D1Hy2%1Wxu~f zS2)5QIX5T7!^SImb7ZnJZhdY{1NNsPn*`LH~ug=D+uhbsatnaONS2lH(^u>l*n}!++ zuWn?Ln5b2wAv`MJA{DZDx`(oQ1}C1`9u=s(##<{#aqR~SG)x#%K1J@%a42q@Iw-Lv!H7>HR`&;gPe`m* z@3`vN0-;&SzT}W^3)4kNR(!76u>56K;hX_!J#FK-#0XUu%D-+*IS*X;4aKEZ-W}B2*SD2hC-T>S=o=hvEX) zw~yxNmSc?yxIOQpQX16j*Y=t$#UW8$V4^S?QzD0v zZH{gSH7h`!4%&FL4*p*U%lsnge$wQf0OZD72EA=pgE!?hy6@{)NvA}`A|OMyRZ<6H zuZFqpsd=wO>g~UL_0}>0%)=U$%!E#Tu*>zaX)Lep){<}hfT`v|)|IA-6l7F~mq06(NMY9KsM0bl9NpDN@&R_c*_hr^6*o((MnNT98Ez^rTT5VS4aHL;l z#7uf3X?jcinWig8DvraHiC1_GlMqE#(G+Y?tg=H@J9H)v8}@$5B9qHde6V3CXR&{$ zg_(*$xz(^@Z|%oP4iNa3qfM)7yAP__I9CC-1|fAE920kPcrn5!4w5sAzno}f>+4h4 zOB*5VVTpx-+BZ^ML1d?2ptg|%eZ0^C)qt_kFib(+*EaV%O%zjQ;Qi4H?ke#D2OK&A zq8;l+OKF20F=mw`59R`OElflWW4}Q=#7&si^P{oW8LEe275r{_n22E3QS(g<^mS4- zgS5k_*yS34Gz|%}6L=l9pj4$M#rBaxEr;L+w*x7b1_23ReB=dd=g1h^PlES%X7^J9tw3xgc50f&A=hZfh2bqHshixnIQ1<@;j#+}O2&ydw?0*k zfY?<;g~HxF${5vio26P2fo`6K;)eRA67HLon9*w?qlPP-v#lf;It5rh*$y$9?#vp= z<2xBD+c>ys%2U840SHUf^?Fc9pUpa4hc#-A$th{CUsujF!e$Ji)N?3B8-P#RRmT_Q z>0mF!NYK;0r+tZseV9Sl!;A|yK&?O)`z!Y``#~1yc;o^pcMd|WKmc~|!6;sUMdTz! zd_^jlUYBB(NFDDk1g#*U5@JyE3SPtN8`E_ zaR{GBJQxVi>$t~J=e6DPc~#hVuZ8nd4-}2%dA9eLwQrBn;BJ(7?QSKtLj27&JqkK& zhUY^2iodBFWuWab$?xc7lhLqDcSU0mK43H}5*hka(IfvqBW&)wh(KU{hR^_i^6L3iqRIJ?7%g-`K9he27P!yhvkxYd4UfbqM}o`0)zaz0zoAQE$L)(qG8 zV;27MDETaimaz{++=^CLs4D##Go}Gc^S7XUUl>~?_RAn^R5ji3mzyz;p>=+R9tT5S zL5nhvmYk8ks`rX>}g*(P>P9k~ZdJWqZ4Pmz5`*1iU>zds$6Uf?Eyobu=w%TGs}3 zB14VpUk~ZHl7=8Q3z~ZighXOCy)OI&Urg%s-1X*O*UA-14-NM5?H)M9tGnE_^H54) z_-NA=Rp+O{xCmX{>m0NZR0OwLuXl+g2=za5+n&tRX@5X!Ib?&%GEG!*sp;pI$)c?) z$&H?aAgIhJl&DPF65WCmibuDzB|5wrYTv=3`%WpwINj)M5);utP0#^{n}Gb%(op}a zIQ6fXhOT5R6vyfRs z?qd_x9=dm%gf<3TUXdjkg4l)WHT8_Y4iP<6&kQ{*4@Bogu&pbN7QzRHiTt;QZCi4c zt}UhZ)FfcA`jv9!WHc(-Mbiq(yIJ8ojw`lY6S!4b-4Z$8$u}bnZ&^e&20NMrdnETShIIBXcs4|2)&#+*jrGf z$pSf8T)#j;`bsR3?r+OW16zCR@5t&#Vs9&JQiGBna6f8XL#Q>fQm=vC_!VfCwH+Nt zifCo3$`({Xh7v9t<|AX-xz0~rmxGtZyjFueYksCpkiY2oMq!Fq1^D)0s_Cpz}qY)!1-lP>U7BE_O2hpvOh%64gm(P3t^KR+!+qroJyd;S=!P72X9m zvJ>sR*ROD(M2o&8Vf{3YXtvI_!8!vI4=^2t<66UGDIxhbT#V2R^O5*T&ximDD2m+! zml_hx81HJpH8u;{t-1tJvBE625f748=r z_Ey;yGr2pur54n_S?+!JOP9Z_DpVZ71}U)=sjkt@G^Uq0;PBiM*KVnK&7hoozCwCF z>7!$|mTzcuxoj5hA~JFsvG5vk)?B+ZuZ;8MY&(D&(df=RzDjvT*)`bE9!O8anJ)eT zWN7KQzJH5_;3`8BCzW}fRhK4oH|%i-4Qzp@>1J9-mc(7-1vo{l5)V|Wl}w{p38AOh zFUHF2Pp2br&G?=`->Qh-YxoT1CbdAbNYZlDv1cK0J zVzwK-aaR+JT7?NokxFr*-SmD`5-gdUv$54RM~TUmAcppQ11Oa;_qt=Z^bEWKkB#GJ z)~yz@&=49-E^CO|Ns^JYgj6J09_;-F3$NecBPWmO_+z7Ge zhR1T-r)D?JvlZsN&<^0YfJ(b+@9&5M&@PoXqrQ*VtzSUSGx;+RVK$egHSbnU{m22E zQ3v{$He>UT10#{0T@a4h*pc5{~TtTsarAz2-M5ba~oA#{^v7lmm6*! z#sope2NHT}PC zJ$g9qviS9LLu2o3*lfkZ6&e85?A-$;&5xrR{~Ae1*x9Q9$HYB-+FM)Ynb><9AN2jRb03kBMP@>$~YJ^hI z6hhX;@QX&kp@K(hC&i21i?yA5YM%XrE_qQ<`N2)x8_yQ=1IUDtw`8pbC7LfV1ZsM> zU5bPaVapn@zX7ddc;gSk<`m=m348p(-TZe28uOAV2eo06qP#U-@dv5;egwAr-;B-b zeGY>ik9eFD&?N0DtIvvt*_2oP4MKtUVwy)ypWrNk1d$w)$hT8ENA&wiXKz3GRbYuV zC5dG`iMfB0+b36_f-on4|Mf2c1yW5V%SbU)j7`+W;i+bx(^Jiy=U*l(s!?HXNrpcO z69N0Ar`0<;*E&19R`+-DRcB6n{p81)PSW2ybwOToVSxdj*JW>M*~RAU(cFjV3ec@o z^2nJ6xC_MCo{o>#-Wr8(oT}~6*BVvV!-LQEAH*arqtJlt9rr$+k@5I7I||Zi4hag} zFPbO)ztB8g!(6CEVJ#VHyA;X%P|ugwGETq(WA;{09F-!@c}c9IDa5gYm`rRt?qEN; z1jZ`k^G64PqUpy(kKlhHdK4E*Y*d?D&ED4iUBm z%}xTmd-XpKlvMQg07`pHP*&jSP2n`dE0G<{Mt2Zv2

=q^ajdPin@5bCupP{aR#D zmS3E@Xz z!fDKU#PzT=_yI07YxKoXu5Fy8=!zUdx4-~)!6G{M6;g<@-AtA~NNegU1s)w!3M$3DBJgJY&H0mE|fuE?vA z&|QigF{MO*pSl!@cm<%RAG5^XcU(NhI|*1JuRvJ<0zLNm1Cl8z}>1ICNmq%N* z%~qaavT{)SM)nXVdHrd6C7C)+dyF-S1G%iEjmoH$%e=bO8u;GtB&*dv@aAu3B~G10 zApK~c5Wh<%C;vhQ@<2YRcth@3>ft$AlN4rlyl0wI93Gp8A;lW|Dhw4QVmQEtk$yegh*44)Umg+ekP}M&c zm$O~YT~MW`UuR5e;)uC53!k>q57PCPFpNkw>`E?cq-_3=^>6iiTE?e~;P-GTVr`G& zz<$QIMVDWUF#5mYd*aj|E|5T9$?z{bA?>HvmZ`vnT1{cRWp*k_hMZ5h^?%48(?u*; zH>XZvteA*3dkz~U*wE5zvA~HtB*erkmGG2q1T?YFPolul%zZOYnJ1v@UJvrg{_YW` z>IgW8Q+X-^=-boh=&iEGTNy3740ek{=9o+NhQ*Up0|9_ zTPE$jR5TQ9b6%inO2FN(1JNX}EPo&X42&csqG6rizbWX#r>>w2G8EBsdI0!e;b`X6 zj1AJCS^xXg4jYVh%H@^Lu3!U<8_huNZy_bQu}I zRxWemB>V5_ELE-L;1Yg*Dx0jvGqz+rxMJxJ-Dit6(&1ccRzV+IcC4BJh z`VJE@j3~@G%af3cbEa`y{z(VRk@Vcz0kCqiSp+=A z;Rr>5XJ)c2q>Xv&#R2eE>mi+8?Y>L@tn>cPChd9!&!)o4MV}vpm}eI&U6ydssj~Z6 zwSth%?b<^xlotPcz}FP9Mrr5r+ogz#-F8B%TH)67Jcja}oEv&G{WU`y*nv>_AAC6` z<-O6bnC#Xdl6{ThAqcUO@Qbzj){KDjLMcz-2>2uwje9cq9z83DJt~ ze#7KFZ>x{$A7t$tDYmK}TvVn|azo2ljeJY#i~X2-umP{;?3~isM8~2>L8^~YdP~B4ta#nXrQ!vtdMY8 zWBNz*V5aZ!B`lE&qb-ehdZu?q{M?tzF>QwO4Bx#j?|k*Wn1o67nW#LZsr_*dV;J@h z;VAc77ok@#vqCDI4ib(ofrjjN5KwUDu|`R%PXM%C^YjIE;uG3#;|BTrW7^I&ccOcct?`S`en{LUTkg)DCFI;tXP5rRbC(-VZ8S@f10G!y&1Ny1YxTr8hy_% z$#t1ZTt2896p@_PiyRQSd;LOEkt8fmiMl2(LTUCe#*8>)Ee;o``aS?HyKZx!`yS9T zv&y=stYsWpX%h>0@}otd9Jf9`YJLipaaY+y^==78fAJol++TLZ%&P6zgC=j$#*Kvb ztS{9V3fR|${sVJ)t%5INf(PDOI;M6+RhP0IGIy;VTfA@2b}zRBh&GL5-g%Y%kLyUE zXr|d7yN2P>$BrJeI#f;)%}eXLr6v*=f&E0M!e08$0VmOKUr#>qRg+cjn3;qKUtKC& z5`kOw-a<*#FYyXG!j7zIOPGCEL0^Bg)Nw{SOW7VV z+0_jXVV-EQB(}=IUhlv#VAx>W=H_|nL6a&(?`>_>5IM72-Uh*LscA-3KBlc@O5!z1 zv=Bwxb53V+s1KuE{vw;O`BI!jzj(O?xHZiA$p7}oGSNz#+17{`GO9;YqFZyd(XcoZ z(wN;7ck${*?2`0*5&D18A%9l=-DRxhk8gERBY%LiQr*i-=GcW?Lu)UWJ)~ z?a+IN{BH(|U2o{Q>WyCI-4cLT?=AV-uG1?wrA6Aq21~QI7WJM8knV3QK&y%4 z)T2%1-Jg7qw30KVN9~>BAeSC(=ar<-8kDY|@eYGyd!=Vh4WibZwEF;GS@<&nF zD^j6!p|sf6G)r=5!4J5M$$~6ci9*RM%{uZXdrUsU*Iqb%QR$}9+pL?Mi4P4E?{5YA zyQo4fBCXKYm7HvgS~CDhO@7BiR6_)Ay*DBr0CLdfeXl@fp-Tt)gsgD zO8R2~py@nkRz-(wBcxwN4s*Z9aVapL&nRE(V5_vDKkuby4dUiaai+T*?R#Anmn zN@NdK!>ZLU2toPZ>O2Pb;qd^W>%TU6xIbCl()cNQxE#Q)gZHfW9lH+Q%P#+!%?ZS- z3pfhB9k|0aNL8j}A%6}^Zjy~#y{I~EDsNFNw0@P`?>yFEw%ec6xNe7)Hlw~Ps6$~i zpVP@lFkZbp0!2b)S~EnXA`U?2S|ebhdP69SVb89>{2oVx-$Z3_}Owv1iq)noP z-LR?Y9B(bfpWW~I>UvmeB8cVy%n>=$+PBjB4E3EFKtY>Qbc;R9nJT*+PCFkWV8YXT3;mr6JErY5s9l z+YjQ(#ym1jKs(U3uVA7q)P3V7<4|ZfS@(07{OE^$flW?VpukQR)1$ z4<32dVv?ZU(B| zd#kJsh)PlpooKy&BTGMA8p><_#DkpE(PmCJn8XD29HT!DnhUq#@rySR2(o5wje9PU zVz6IXRAHdrW%Kc#IO)Q97I58Rg1umR6rnJ)XxJl)?R-X%0eQ6}zrKU`;F4!{8^^rX zi1epLqoeZc9Y(rHqvNvsBT3yk3y{oxbk|0QngS1H4hhCY2#q~k#2z(noMy&!9E$EqHbV%*hK+R%Mw<-PUCYGhF4568*J&vp@V9404rh+VjaiL0 z%|u)_3`JENrgkB$oAr0GKEdOOZ9e*OPcpA|mK2A%q~31Cq0GNbx-h%YDl|Ev`^470 zzl##`&gf{SEE?rr1Ms?CC!V1uqz$7XuuXp+EI3vIR>-I8Vl;bTQ=#vroz$|xoJtHX z9@PJxeZBX(EZT}%lI3=XAL-rQE08ZNXtvm-wz7N=`D%dq6qn}B;%pYVS7_k=X1H@| zUHZU#nu}hJ6;d+hr()*0*{}IgM&I;t4T`o1<8S)-H~x^y&g z1Nm9NNAG_7v+QMOSL?!l-Fw#4;?m=K7cZXBIaAD!gMH756WsfXfPk?KiHc&|rkj9! z$GXFfj;pW7JGHD)xI0wXc0hC2b_BCv|9Gcl# zJH1(7yalsIBH#zXXATwl1VZusZtoWCAu=7}w~BJ}$EZE>cEd8e#HB@Y&XnxekV)t- zZm0U-t^=80ou5)QvC!X3zv_a;Q~Q<}z;FpRPcLF-wx9Dg;ZGjR|Lm2#e6a63{lNZL zmCb9Wv{SH*^cCrKApa{Nadv2@hFdKTIr{dDJjGb!R=POANnK6%;f^Chn8j$#EjsSh z6wz)5Ek&6-p^uJha=;*gC}Ww;K<3Q~mutGyP2%H(L@xl=w11$l0dw&rO$7j2R!T}c z`QqEUIH+k7gUp!t{wG4!g|6f-MCpV?7y#KO0f?w8G0MG)sOXXEjX)sP0t$Y>pajD~ zVRxqyEjpnVaY4J3U3WSAq><`T3;lvRY=YFVU|?Z|mzL5N#xznh)EBXJ6U zgg+}1NNOqmED0brCW-3DmBJS*`^gd~(af%*xf%g1%9H-k)u z3pXIepM8T;T&Ur3s*A&sc}+l~Bpf*v{x4L-G$EXm*&+ERWm7zU9X(3NU&r7{BVodE z;a7byx?cuibDy7__X2-BXI5yuU{D})+kXHXEq*vU!6pU}vLb!x9|X~`sR?R8k+F3* z4nRdt3xto01O9-Hfb1<=+16InHkc=-zz8q(*D1gWpsQJntbME19@SKZfw{eWNqU91 zq{URoYu3e6j=;O{RB)wBIeMU|)9B5GMrK)s1}C6$(@9~V<=wk$aQ4$m5#XQ^N%VCJ zr1Pwj>!so0o=csdir@*E9z%StmCf5r?f4f>$2j96hnfFYRv`GlP~^Y?!()sMP|K+0 z%Z`}jFY$lVCFyzCxIZ5mOY&Gr(r9!TcXk7yYffI6MnkW-%~SWCkw7Z{P2a#rpr!NF z9}+Vry7IO+mR&96Wt%)j#4BFXT-=a>O36To}~(y4hsnwA|Y47mY(CQ9zl zD;!LgeP^yVMr7@QlIf0Xd^rp`N_yE;`J+EvLB9Q6$q496uRFlrss&3XTaJ}gc{E0G z|7VHt9q+oSvbhAOYxE+N0~DP1`HN+?v?OAhntov@z##b2uUN__50wj6yAQI&-ZQ&t z-k)=@e*`=q5vX0wg2yIbO)GNNM#zkDA_7EfwXW zUB{t)&PYoBWDY)9=s)e`NWFGbXB4>`2kdLA*ci@N>OQM#5!U0w+dD)=8xBs$o zEVZ8al?*Mz_*9SO8?r22YR$p4SX}1A$eZPl8(9JF2Lqhhz*-{l>*r-vrM(dE#eSEH z0_$Qb&mig0fTa^;m2R;yZ>dz0C$##g@z~6(T5{<2M%n!<%H252#-?MT8XAdL8wc#_ z2;Y}2FjE5%9_&m>M8Vxo9F)KyClh9_2mG>DVW{>5DHbK#G4)#<0ZFC z|G_qebPc!jC~LY0U;U;k%egglw1dFuc>Is%_0U=19K>>wxHQV^TipYcCLkEVo?)?0 zghXOXYBKS=_7xkw5&!OC?4Pw1X3QmMN_OhZjocMyRW(P*EtH*WSjg2?| z7vc<{H2mV#^=6w6_PY@M*~w}^i6nyGBBVVJ1}lWzi_?P8_MXydi=N`yf>V?fc#i8a z#a78j_K{20<>^ zOpr8%(rZf0b>NDVgD&Y`d_VZ>wPRA|_*C{wjEW96kBdPetvas!n-S8@Qo2bb)23k=!unDYhg|i7qlA4L5_FN&6(QCHit2F7XbBX zr|ySc(?9$`KX=z{zUPLW>;ev##H<+leDBY}1XeCrv4lrW!di=L3 z^|zNsF-r?yKl_Rw01cd;74!1%xN6jKP3lEHZhQNE5YO6{E2b+siSUn0GGUpSje^Ga zu51l2{{vkL8Tw=NhN99c=(ULa?jRc8tt~*N@kvLsKW#nJ6bO@3ZU}ns{b*QP&w7+0 zb~LY38dOQe(ea~MOW)uin<=T`;lY0OzRdHl1el|<_$S(NhvP|sp~6i#OeI06b#$fJ zYS4k_sD+a>Gb+OWJdL|)#~yy2;^X9q=Eq6k2=+Ifqbp8mRd$y3xz6>DI2a7pu((~} zyim2D!e{W;fA;Z(Qp%{w+)__tYqux;?u7SXf4?|zMts)u&xWX6>^=a+#0MFdJmlPG#+GB{j>DU?Lvs>gtyA8_EFSKm?No1 zMF5L(Em!X7DG*Be7wDN;8(aLAb{Ph0P#DOaU(bf%OuiQM0ZVE%DLeJ&ry)Ff;%0S4 zw}gmqKfaLnHR|^bfqE`1J(VP#pWV5LKZ>*be#F9iO|MRILQ_R~$LFYMU-*A~HE1J@ zA&X085hA9r$|Ab^yLUv5u4C8BCn-JGbi&_{x7A-#J^$;;73t=i&pO4|XFs3Cd>ees z2y=XT-R}}o9Z`ozBy4P~ZAu^Fh~YsWrPQFGk#AF?zU{tBzv7dFw^dstyt_Q;&AUCD zi(um|dHuuTGSTtFFi68|jA_?rL(gIi5YcZMlc<4la6)h^KOS}03V(S;uW~NbPw<_0dB=dbd z^jHo8o>-9kxgIC3izxe_gI&rJ_ep=(ZrMu|5uEkzMz__20+z-;veB9hm=J4UenJr* zjAW{Zd~qsdBQ{_i^*yr-Ln3Ib1W+m3la4;paK@v7tGf)Ndstb-F4pHr%`5MmOQJg( zxH?cKZ0v1~cey;u6HOvq2rHr*Uden>!Ya*z`iK#nL=oTJvU$+G{p$9O(LB^=zhe=! zJy{t{64bxXDn+yMv^=f_@y{jstiPyC<#iuBdLK4R<}}mWw&X8)0-T8S;pk3EI@VGR zpW<=1+bV&nVS@ZET+65p)L7ZrCZ`TX`_gVL?T05RZ3@S!10%X5FW#<4W&5no(O|o| zA{I;tjQkvDhhIe|W_?LNXB21ECEP2L`cFJ4RY!%LR$UX@TV`{9NK@^e1h*YG=i*178UY#-Y!EJ@W6uoGK@zfv}P2Q^>EvC&8I0ehhyBS zYTFYv2%(T-;cfBj?ej!aSaPyb9t|eLWpT(}j)m;KhJIdNR=8YWpVHL_nLk`$XFAYo zvNqmsPBL2sVP25De;}N2+0gns7JwP02gwj!xHd%nJx(Q>FI;KWmWAwtMiKluP_W{v zk(zjz;7gKenF0408Zr92b+5IE;32?D==p$Kywv)WCO!C>C~#9L&F8+M=0(7(i&$3* zt5@(DS_)iaN>yu_CAQqg;(}YfAM$Y~5yxFlgX^1#lY6G}M}5!~pfp?V;3}!sg8IPG z--96jWba)dE!S~;`ft-To)lX~BX;bYLg*D$HOPQuw7qp_bD@=lfS{q%&z+B$0>_+v zQ^~Z)$2jXHr$0$%Gk2GUvsB?73giUx>DF{uHeyakHp@LIQW&EKJWZev<*`;$41aGW zAG-QXel|Phv!IMm&VHVmNq@t5ldb2;4cWIiahFlv9Hp$%b}@z({V4&howeB7 zJs*&2Ue#LL(v}x+eB6$X^FtfH5Lm?3CHfnFN@)LdKh{1QaXR5QR<9C$1~))93uN)3 z+q{?OK7$%}2UC_tJ*A&1F|tDRvzt{=e*g3lA9b56@BPlb1b7MDM?FI594NBv(M1Pt zmXOlkQ^^TuykGz0tQ=JiajyPZrdte{XngHm2|%|EL~Nb2#tj)yQA`2JXSAuX%@YFMA4a5-eckFl#AKo-IDOp=*tGo1mUtI^Os# z@YMb!L2X-Rh7@m`Srbky;4dI@JN6gLAKdQ^B|8gMu<(~gTVeMTirgq5Hn%>``3!#G z3f<6(f7m@>EaJAjGz??pB~)hZ{qUE=_xO|_f?luH=G{QPyfnjfW%@}hX8w=gA5Rrp z?a~MIgnOun;{DFbtMUP`GABMLOKdp1Nk>jXYTR(vUtjR@vB5if35-&j4jFdf$9{Jz zh^7%%%4KFb9`*5ye|y25>dl(yM3O(7;tR;gX$XW%%O;3!P5!$XT0S z`;>#mw(g++xXt3BVS?S8b`h-1^*?XHh9)?&wxd5Vt1~?jr+av`&L_iO8*~Mhj$8Sm zVp@ro_>RPpNOe4XK+!d=rZRSc*1$Y&AQ z};m}LsJ4o5_i_}gQ?zl`Z>xS9>97PmwH^&YjlE3{m(A#|#$h2P`V z^k-%lmIhPV*nrDUDrVHQ1ni#W@IF{=k$Qj_qEL*_1o```aO+FbzXeKDx@GB ztc(c7TkA4pD+b$R$v7O2qPml(7=#ld=lEf5rKXees)cDBFIe@N6?NoQfeTJULx*qa zE8|P?oIA4Otsu;#OQlMyN>^V_Qq|ElC&8cbx6~E$4YFu9#_H6hnNOOv4k580=geY{ zg>X6;YoQ59Sh~i&&7r+=!6S3|E3y<}^|DAZ%QDZDa)?Nd9pK$|5HZ+`K9W`}jr1IORX*S=X~6Y4pMTDI;(N=CQC_dF5*MV8ox&V_}ze@L6p8$Xo+S*QHmn0gI2yOJzAaP;-OKXqOY#@C&gZi6r!R&SQbv*MHT z<7;*7F85ZczRyJNj@Mkk8=Vpx9s3FSYk^?dwyS|5YABA=^oR6Itdi0DF}G16hX}S1 z(S8rS$JCeIBl-PMd3}ecEG>1t+g0dwUfxq#Jgx_$XODki)<1nQihGVGMQ=t&#PY>g ztL(Kwz2Litr7N0a{x!?5Q*SGenE{@e2598vz15!W<*l0Ux2x<##9Jd3eVlk%f%MB* zDBXC>{LK0N@63)0bCDo+qY>Y$xFE|c=^&ReMJUtS}{u@^ief|q*|OImF>Lsr)2o2$fV75<}=0+1j@OgY6I9Dd*cYi>BPpg<6EXK z0}r>T!$~74U)M+acaQlrfjiNtfD|T-VQ?~XXevO;P*-6LX!N?2SBJ}#v%`=8%4}Vz zHH5~e`>8)t0~$NKK?+q`xViL32cFDH$?ji+==;m{u~KZ@_Dp zo3(^cMJSx(8(g}Sirc9nnY&#P_bw(kpu=E~DhD1e3TCmQi_{@h6~DBy|K=M{L+a4g zV9&zmT5#hm38ZMkxT{M7NZHpXFOY+);HPk9+$)n0M}`mo>UbpF>v>&;%Mr`fFYKIS~MFyaaf^$^#{w-VSTc4xM&K~8U-QQwc!Aw@J8;m=X zgf24A6~jQ_Q~tLncn~D4ELAQHM60gU*D&)2wz*nYX*77c{$5sz!C@dcqCs>06k4iL zvi~D=6SBKntjIR(?QW+TVKKM@Sw~L8P;Se_RcBX!+l*^z?{Zps{N1CHXm-ZC^6NdV zTgcFE6KyBR6eaF=0Ick`h+EMzJ0W3$nP8d4fAQ*@;;vsFWgdhs4d7o_7UVhZxE?2U zu**ZL{3x!*u-ME%of$j6&zMk3EnamTdlw~_mB93&T*;B70&%AO`{t!jCv(hbFSC## zt8q|l_)1AxT&Vsn;Nnyv>z>PSJM0F-lqUCXz+LxuiVs{bX6(2A^mZ!IA!zN#IxQ(aXDlx=2;%~ZcTmwy|8jaR$lm*xxv9UMK@k( zarwCs=kq#uu!-vmWZw&T7x5tA%Wnfjux`k^dpv~olKbG6P6fK4gsaKu`}2~dj!%*K zfvHPLVd5t(BMVNSo8WR6{3*p)&fs(`?W)y7<@C=yalytu6u{&^u|Pl@&vf@N$Ne3t z1{y_!A!^{Q#sB;&%cFb0HI~+h`O`xqOI|XfMs(=(TQC`xFlT>@NXrW1m$;LbmVY=H74;PO|{%0!6>GObhcB^SPijcY( ziO|HLV*_q^GOr`Cm$;l#H?Vhhkkj@f5x~aygS#wU6(64~T?eKLI~6^SBU-gi#de#e zE`}@b{hu~-)A>K`q^AGA16Y#Xo%}rRi(!}lLaExzI;Lyp8}p+)Cr#Xhab0;?r<-) z1M%D)K}H<%`oq$g<=6OFFR~@x<6q#&?LLw1FpH3%G0GE3o~iE(y7w(FQiS^zh_==U zsWHz`$}gWto~^8@*XxU)eDL^ZF<-PhOy)#0jGCTy!T&Mh@Ff%-x}s(H%Vxb=0Akcu zg71SOFP{jZH!b@}%JH7e#kJnQeQM3!+FlvV4A8Bn{xSmbGin4t{Luq0g2Ur3QFZAk zp@occvtnBPNt^}8WA-fH!dR4>^ZwlL7w?oolL1w)929z ztxsJ&1|mCADbvw!lnWWy0>__~vx|y`yXwy9*Z~Ayfm-B%e^iI+kx$Dk-T}dxfZz)C zx|lwDp`r3GFaZ(3fYjQtZ9^4zbHAq@)b!A=K)l>gpa1yYZyc1-Mn7+&w{wvji6?&1LW2QVzu-T1xjG|_uRl-l%bAJD!Q zWyM5;ss@%t2LeAEMDpV042hNrDT^EGZ{g)(yCL_N8zy%f(3={4a)Hi>QdgTO2&tue zN;%U`LVBAvoD<$BoRY&_xR=p%qL)EmLi!N$^7D0+2;Ra1hA=QNwivcZ#D3n>$2OY? zAdR*>Kiy@Hn5huqRcUjLCyg^LD;}L+kmeq3&(0>}5@P^h=)mFUt*1l2S#V8vm2`Rj zGy~nHN#PrKt%eI_%5q5uekp_n4k^ENLaEQZjenI9<@ z*&q$*1ifeBk6KkGTV3$=zp?MNeXu3g;%NK|=n_yl zI#~=1b+00Yxs*+Okeuj;YJuHy2hL`Tf+pesgL|0Or6G~YKH;vOXPpN`OJD-n5y}nf z9FEIBC2(Aij8cfuV*_r#p}UK%W2l=y-!xPind|hBe6U& z)yQ<~PBQrQMcEYE%Ga$&z?;5!d7m)-8~x(mCFPTOQLM!X5bD(M?Q3*+0Nt4G4UI=T z&)J?pAQhWUXQfI?Iy}lk_>7lcEhxEh7(Nl-T?yn!GkyimOtNJX1$L0eO6!aG)+`;~ znvl>FcTbB}`!lORK>O^9x5?*)0kz7ZVxz%CBy3Caw46UYJ45AGz9X{_64Ciq~B zoN|rBNsa8~OEeft>>TfWnSudT4?T8}%_&XTy$5XdP7}!%sOKS&r`DcZ>q5Jb6ZPaV zrpk<1J#UPD3QscwWH$Zc4D)@Xbi$)0K3(V9NV>reZ0B0#vc~EJ_MqCEmL3wjj`u_T zd#Xny90GHeND~sRR#Q0RO=ZVuv+{_zkuy!Q9Uq@8#$~aratdWhDxJL9$sFNCKFcD` zJF)JMX?3%!58{B9JUx43{In)NIUh6I-8gn6@;J6jMx!`eze}IHZ|Ho8S#jdd^WJO` z%O}OTQe>Rj z6?@eIOVeq)ZW&tPl+f6;R@c;ucYw`ilpkt6d5?#!Yfv&XabEboWo#vvIVa`-vCf7K z&)F>^xYg~f?IhzWK&OdUJtTntw>H*p0GU-EG2AMMkgsu|AS)FZ&~B!7PxA({&1F2K zGJE5llA%^&r#~T_aoQoJX%c*s#?sS*TqTWU&3WI3XI|J=>KuDQOEHJ3(EI(`b9hn2 zm&71R`!}A6KO_|yl75&M6>`d!lR}Mn@F~s*`5V-ff|it?wb_1v2JBGLzHQbJ^GAE+ z%?&Pacgd|~M+>6Qe?1!6^q{ngvBf6^}&kJ)K^f#F&GR)K2cv=K8 z?0TYXTi+dwa{bsHCc|6h?Kxr_xS~r0kNW~TKk5}rmSAR#7yCZmnEauMw~-sO)UrEf z#dXz92xCFwM+I-uz**dO66XR(THdI4Kgd$K+@=c>PlgY4!geP}(3inwqIL`LH0d{i z8XkGzOC|M8lOU1Z#o=ar=kWfPC%t9o3LkaURNQX5gTc?r&zIgq5wpxRbr$(E~iX*OBcpQ3%MyH^!|-;m48^TU@OERhQ2(q)DEUKuNE+e!XiXDlNG zU3)TE#PEjWRh6r4F6Lfe^Ik-8o47)sAFwtsz24JzOsPy+J5n|oRD?@(A1N)Vsye!I zi|rw=auN3Ip3V!SY;Qg0+Be`Kd)-SNjM&f-j%)oKtEy=%xgyO+A3!pY+F#SilctD$ zr)JftBo^qwMUixxNv;^1$UiZ|I-XkbT)N#Q6FnB$v2mbPc+Ac)7T-N#%H`C}8LmhA zl!(wN5pr5iUR&+0G{CK3ZgFynYq{t==4wQ?eDczjOGANEUJN6KdmL%p(BXWNXO58{#t^#simg{ihU#_kdnf)4Tx1i73f77p{0yQ?rxTG&yhD&(=FFZuoIJ zy$PuZxZi!mW|L^H$kGHdyIuCtgyjRZwhE#2HFe7 zfo|(VXKoKFy-cgjSk4GWpu0CNgXbuisbk47la!lUO#XyFf%_6t^f*A>WQgV zZ{k{AI<@Gi86zI2Zi9>NF94m7#*%SjB3T+w@NY#8E{@};0k}K3l|ha8X~TggX)f@K zH*8O8AFe57&8M1Vtk`wF7F0eq_*B}G85k~44fz}{Xu5PK;kZizpV9F)H)%xbyp7lQ zzDHT}Q5`_t!H?P8+D)65o=(G!BdTdh7-yLYDCjO)NSe9Z6`3^2bEWN&!aK_QLl3i> z-Zx^K62s#>eH)T>)@dy1f?8d4U8UxQgK5vb4yJEdmgb2JqF9!$#E^f!di~mE`kcrY zpNOvFN>UJFk#Jslee=5?QFqd{2+r7=*KY%;Tyd^9%lgMWY2*^+qgk5r z*|*eL?TlObvfwAWw3?hZsIB2{Cu>y}J;obTUUkTcDO5pT_Kn8PsfL5`=LiEt8N9-| z07+bCBHzY5?3AY~+bA#DQpy?8>M}1@uL!Dmh8YnW&&kaS;tksJ!&Dpc7sAG60)HfK zR}7@T9v0&N>?;L7V_H9N?A(gec2ddBNgkSe4-!Y#a>}OU8F1nr_qRwZJQrX{8xG;Y z^Ngj(@b_a#i&Xt8o8#2!LIf0u9;_XNP^aaMGecePZIhaW9`!nX-gTI(d+%+{)Tl1W zn5IjcW_+0^>y>7MZ$#lH){=-U!-Hq@_BXSijGKuFrpkd@s9ZS3`BRX@Z52+RgEv+c z-BtvA8QZ?7^!K7M&~n4O`rWM&#+vRO&}I`_3rqKFPnTsdm6<{s_snSf>4A6!8Usf$ zB-HQ2OK!_W;4&F6_e-U&hG?-+8R9#(c{~B#-;Z??s>78B4KaaQ=1ku{Z4Py6&TG@y z(basQrbnZpy|ZU^$twEZzP+ww%Nv)>x<*dXGyoUDubP-?QX*LuPF3D4`4X7Z%xTi5 z-(}VN>1%EEAB$Wp7zV$Hfwd_TCz2>-Ox8VCYln zXn*Z@Z^y78>0&YX&l%I^-tmt;${fKE4Yq%P%VZuJSsLi)v)X!XUOq?Ln%3}U@==kU zQjV(-**2I_;`I@G7WR_*0aZfv=Bz4CNSZ2NTqKtI0n&)>d2x z!z<@`SKEZt^b<$l;id3(s?0dhYM2^^8+0x2B(WH`$%vAPcKQU3>|nZ;3%7-v$R|94 zKO>)`iXvBMdq&d6vEfM$4=h|3@HOg7=?*(6Y@bO-WO}76`@n#Ufc-yD6=Y#Eq&<3D zcY3>io>rha!Nsn&p!6bv7yd(}@#=Pr;%_V3v<<}sDKPA-- z+IJKWW_6WMz3)*zvZ7M3#s%HycF&vFZE~k*b(XY3W~8 zN>89C-l?xbz#|>i)fp-~3KHfSxoNaNzB$ZgY{i)p@w$Z*xftyTZ0|ZR7`KSE!gm6d z@Gv8?DKe{Pn!$jtemC*~d+~**iJ?3ykleTZOXuvD_A|ArJS!H>+q$Z1(x!zYliJ?T zhp6OpoEjo(h1aO40$I}i+T=}a&O)?v;&He`yaQy4=n3I?U!&%P=Rn3y7163;Vy;Y3 zD$#A#a;25swkXv}X*sXHE9N)Us;c7*0}y`M)ZbH`NF-lGJz%FdC=5UdJR0Oqk%%aY z8q@I1_O`C^{vtNu_PAl94LN3aw}!EA;4){5KFN>nSflQJ4tS48jpgO%c)>gOh&QDp zM5Utdti16Yrf}o6j@Bkvu08zDS6^J-SKkVAP+K%6M;M8Ju-%1*^buzpkYFy5Gp3a* z#n;M>n|fMk$}=M@ghMbOA&e1kpD@jPjL}t^eKQ;)_g!&3E|a8nm z{DRcQc%zD{)P_UMLa~SF5zcc?gW`aJBV;P4YcoftOlblEd{=PLaeR9!HMWec(S$sQ z+E)HH2|{jy;{4Qa2%}{|bDxTiy78@(T*A?0?NJC+ZoW?TUT%cycdt1QlZp16yys9{Ea%r)c?dz&98T!uc$nZvTP64%;R7C+R2$+yXn3P4;#lq8@!6#mGPYd{g$ zhPZ?)S9~muli3(!N?a$Sdn^t|Hak;ZF;JBjG{2=-B^xU6*7fMG;Obtuj}vb8X63ak z-7s*1E}WH9R(+C6pU4atBPw;Do6TG ze@=UwJ({^aB_et%hej6%C){h|jI;S|a%vtR5bXgDC`}DiOIq5rr0pjk{QTO#luQ)W z?JYh!i?ycJsPlu|Nwa?a6bA=K4e&Gowp3JwZ?g-024`MlbqRb*$ad(=WfyJqs*eqL zM*iT!D96hKh<+c@DAkR3^ykgnCtsB;#(ia^r6X0*{07$X+^T4*$z|ttT@Av`oBH^| zlce}7wnq_ETUuHg!1alwzn@<>Mha?y7n-}>9}%J{+4Ff%E=!56lgI$T0u2RM9>84g z8941OQL)Fqm3)ritDp)2bCIlZ7z-6uIsk()4S=TGC(J%QiBDu-_aXw<8f&!lc-=?H z{2~mHtWWDujJ=v<&*iFVPtEgKTahzaWQm`jAK=n{^36)T*ln@vMDlH}n|#i}?h&=i zLFkxW-DbbrD#qbh%mu<^>nJ1Wyla&b!D<*aQ83dUe3v;)FXp9vSs* zbOrftHpqZ@eF<$w^xYbvIv40JPe2_^<+F2>c^vHmRL!0|XImw&D(e;a-guF)aV5Xo z+QXNpMMBicPA5Y5LnkixO}G6!1T#*BlB?bCiDb`6V=03hNz>ydNF7x7;$nEiuWtEo zzmXbdPFu1uG<5GTLCqifCrq2ATomYrclNAL^9&U0{o@j`4MWnrud`EKeX-4 zuCZ?@+)l2{@{TWtHmn`2owV71#l$Z8ci79GsO3Z2R$$BgT-596NHhtu=KO

I@EJ zy(ql43AV{Wd;-M{fCTv2`^)#+wueGU(&YvBfrK^N1aLhOleCmyvR|*8z_@% zU@wdoUzDyeh7=`77$Lxf$>pob4JzjXVncnK(UVMxCzI$QtABC8tPKwb$LcyiY-X;p{0MmYRaLU*SYXi>-)@z>k2RsMGW0if z&LU{_eX*j~yP<%22E>K%`N*cmjq$T>kyRPrv9@1~%5kU^UTS&lR0SB>@TE~`6&TrX zTx_xNy;CR|(&}Frt!hj3fyC((3^jioR7m19!#>&v%v|yeO84IB%>ZnJG$r@^GN%PVU&dKIHFA{0;vN1ed9au+Pl=a5Z z+CUTp>^GLu{ zjW7R0h%;9ylr-o2jK+0pJ>(>lL)kuZZgk{Q-4Z>om@CZ4Ui#2bfHrKBZ%GpUfv z*@RcaXzp@q>5UZEoy7+Vd2E(=n0dMZ-U8enOEuS&H9vXkxGdPH0r<5_;nyiHTdya~ zB)208#C%7pzun=gAvhM`tk%<~&p$fNXNQ;4;RawHrqo_2b_O**S#QuJ;#C8%hSg43=}zmCZdXT)qsOEXH1jaQ;jh) zW}E){^?~a-OL3ZFOa93n%Tn};irl@L0Ob!1Bo|%061pjLF+nXYzv7xWnvJH7f%YiX zh|fIr0>fnl?11aGe$ZrC7N&}GKG+lRD|(U7d5y~?7J9ffy@w#!bh>58p;)_2Lr+hr z%hG!2o6x&%Zr!jH#lFVouB96mQY+nH{7kO$Y3{UUE3A;_o(o%1&CY2r;Imx31g4#5 zvpVUyGLdTWLUK$t^~qw}^rG)dNK3<=#sEiM(skY5bK}!WED4?%bw~#yasU0Mmt*!) ziLbc$7RFnj?en8at-VlsFY>|ASY5&~mjdu3;_n8Ur99%(B`Pd7uJagdn=mY%F>Gm5 z&KZ9C=4&JT7_NcLX#^_GB}@FD&)BIVWOHKq9yEIpAi17~aI%<~XMv-{><43+lNwua znFp7v4aZbZgXqkJO{^OOyYurf^l|Vbjo*s&23EfU@)1sxv;MOs&g-@n+{Y+BT>(6EWA92i(>b zU%zD(CQD0(pQUV?VBuHwU(dBOYTVoH7p7d{48l|^x45-idn}Q2Vl-(FZ?K+-CLiv1 zm~G=<{j{x+BGz)QB}$bL=;1y&0Q=OGxv?YUux)lJY$OQ^9?a$oXg3|Gnb*zr*3}2K zARd#f;ggh31I)sc%kjKy+j?9wDTg(Z_}-6*cc~Mi!(lc&>pe|d3QgM94p9G7m-E&K`_@lJxN7q_D7Ls8&8?ni;;~H2s=^Y z(Q(qHRdN?0d0eK_yq6#uWb+kYk=!?ZOOnmY4qC59(x;9#>#-@W&r3GTFhbXv#6@nF zJxQAT(pu@sc21ts;}C*LEB-9Aph!E$htD4|Z)l32(h(|mHafni?N}1zPW@$6Wf~6? zv?I0jOhH=yZCj-Z^;1cu8%;7nZgZ|x$2dQz`eC3JUZ3h$+vY;kAq8Ho4MDcltwyIQ zUIt{2ck(dcgoxm6aR*(-vkr?|#ar)LhkcAidSOq^WELl6SRQ976RCfMn&J<&+_ULJ zQf1AYkZu^%=6-4{Bee|;k~azPV?7pa=T7gBjjVk{g2VZ&+0-hr3r@YshQfN(O_VZ` z;eAW+F?awvD{PXbDhB2bPO}Q+v7??&zvI{9Kjg}4dlhYqs6Z_~;dsuYsh*>lyVbY> zLW4usDRJ=Y^`luj<5ejkJKgC>mkA%$H!%q_qk8kmZ?mmu z20mzGhxzrJ$bqY+o}5UuXFpkRk(YOg+;I-#nX6Q(!U?YciM+a8gli3yOx)?oZz_sl zNSgv*Ni@D~SYhdM?Ie@{tDMcavCi7c!zEKq@U%xQInz5JqSOv%H#*wy|M4bno-&UTRT|Zuiq$qs zwo12X9oeRS{kdw#ubjOTB1s|qKeV&jgb!Ym=jCmr8^od{aPwB@rVo64WlSqG)al3B zXd_i4@c0|3JjNX#O4CKDavsnUXIgg-ek_ripw$o>Vm0sdW|%vPZ@aTDTuhmtSE@n? zA8EMcj}`OF4Kw4kW=&C4cO2T-9h%CZ#v31xHw8x zK4(s3@m>KZe$A7o-wS~HQ%=|w?ZZY{FP6;G-URPC{bvi0N^QgeD&t>GPfYD!)u@T zKZon;BhmSxJG8yglwRO@JL z6i2+RBfJAH&!#HoR5Qd3Ggt$65T9@cM5>Lyv-uF4(q+L#Ut_KGUPmD()Yh_2`S;u-+8ZUYS<$$`8_LQ`XRa;9sN{NLWDKdPOYw;f+=pY z`dOc`+8D4$6om_P^&2)ZSdxjf_K#j8_2u8lenPW!H0$IPT`oQ%a z8HIsYRHepCl+P7FL;4>De@Ng{m5Ma?%;Z6mUhS(YNSYPWYJigxx}Au48rfNwB}hJN zh%%tCRH(E37+-x*t_jgD+*>%(&x@LytTpLgZgE#vlFO+b)dsS8T_Mo@x9(5p$q#lp zham5Lg3??g%~zBmvaxExhb**%scx;?Jhe}V+pZVy?Oh$GUPAKGruDnEz8&ZB8vjx) zPOy%Fzg$bN(~K=WwFd%(x$vT=eI!@5xNE?5)EhUip?T^=0=xEZ_~Pr9r1={*L+OLt z9OP%VZ@mNH9Hqh&6luoLiBm?`l2?NK@oTMHH5o+wSu?^^pVQbTqDHA>lyKBbiZrZ< zjfa>%YsSPyib<5%O>vySKeo|8SB=4SPeln;&}1-xq6{p782B6vX$!y2pj_JzmV@0i zo*m7jAETg?e73+B+edfwDF8TpsIJa@ZkyRGBVfzU_Oc{&Mg4a5%u5Vv1Ln-jZI$bW zwXyv8_ZRL%W-I!W!W?MtA#nn4@63NbY`q>5$cMa(Ih~2$=-|ackv#M1Y(d2#m7K#Y zbY%4;4XD4!zkqfZE9j3Ht7=!>@W!GRHsN-UNr)yEuLPn(pe!h7@6=BF z8E$po4Pz$Pp}})MqoZUVWRdmx82x)O$*si=Wt0{9!qcJd2WZc%iOZz?MnVHjVFD#r z7T~h3&S|by(WJlJPbsY^((}r8K3(RLnuS}FqUwf7N57Z z4*CQ12azo9rwrRL*o$gEN1TkFn;HErkO9o?YFs9^_!W0X&Lj>f2{ZoW9@`fhaRL&! zQ~Wh}3)ts}xPy&44*p91Tg$KM!BnNr%`@Sj?M~|OM*QTmuP~>L$AMa$eD-27R_t{c zQx)G{!qG|2ouEvY=b&R#g2qcr-XV6>49*WILr-YUTsafQ2@1RYq@e9X-JXx>1YQAdpPHh;b{=z z!ZYS;G@g|QZKt5(4h+QBQ7q@hzad-2T5;c&2%l$s37bD@49*WTHJ;c5`JDnL!;jpj z)`YOlyY}5TD(--Jomz?=K~IC^j>xXU`sA92ix9T-?_NhbNh7eDU#bP`FP}_iWoHB7X`Ms!vqX5oo;DS!0)NoW9|T*WLPrn6TV6dE$BlQT zB6IAkjwyK&bh(H7}})W{vGI-U~>G_bZ6pTC*io(jcp8yU4a!s0tfiuEe{lgX0}$`W7qk)uWKW)F#2rm zXNA*W7JqBfq3ztH&X+9XO4~|Lp zqzN8SfR`>$rvqmtx&|5vO=~qketPze@}P@Kxtwe@SJ*PkLk^OJ>U5O)r3U~} z?Ic_Nb!;1n`hn=rWCO=@BSktnwK`1|xp4b>)K)L2y=SlDDsbe)S#Wv3 zhFHi@06Exur|I;I^74s7-8oz*up-SJvNGv9W-U5g%4S;-vB6zU#^3Nlia=`6q zw@{$ThWw8}`URStzkg0LVH0_=N^$i;uHCTx_8I`4P`uin_onFV4`quftfayZ zDT4(XiqmQYR`0edFZ)SVUQZQX|8_iv^VlibeXL7JCCTB@a_)fg+Ufeoua(%*@T2je zPwwGs#>j51q`P$OQRQTMP5gyzD*QG-PCaA5l?@m;j>pl90!`!NVt5EgpJ1;u z0r`Tg%Ez#=-4A_UupD=%wU-+eV8(&*1FLjpTid_l0a%%?ElxZ59_TTk*fg^|Jpv<$ zgc$Ce%@6Adi^CoJhGrsjr2?M-P*Y-!Wz2iiEBXX23T{P9VdZvn#djT8YQz~pmi->dqHRuL<{pW?83 zCLcECRXW0pA8ZgKMO@TH=^JDiV^z^d1F`-H)rM#5VJ8pr$(;+9%b+YmhxP+tPRI?+ zF)_T6+~EHvp8ep76P5OO9tG;7ZB4Dh8`_}PePwoqL(s3{3s3&vUgIF9D?JF~%YeuRG)^wx1G9f#|$H zSW(F(a=DYmSJAg0Q#8D2#;*6{fUxer2?JLyr%P*O=D1_cU?fqRLO}Fj6p(86n?eW} z1_`PyFHu&{=~B;5s?^24CX;Z|=U`Iar`HPxpCTt6@oS;2Me4*8kN=$E0X~6YSOrue zjmvap0F*qgd0V+pz+`X6Nf&Zl$lZz^iC+(G&1+*BUtX+Kng3r11J}Fps+$aCnbUsh zGN>d<5NK(74eEnkjrFR?&PU<`2fUPZp?TAx z?6sCV?j&Om;c>&96Ffkn>%X!<^reNk)SS3ojalis-}7Sd9mT_`=e)bd$!n#bHl*mf z`wk}^E7%Or+UNFC5{h+`ULH&?JD;H%k#xmvVt5Bhm84~SmH&+o_1hJw8K=oyfV$n@ z-oC@lLM_LApFp*k>$WK=^>g?0_XW<{TUN!ztBVH&k3J1$f3h9pf{7kje?*k8#8y=N zH)BB7lszE0UXwrYPFciZfdzDB?F?^QS-_ol6zpAFUIE1-ZDJE|wB{wws!QFwTE#cw zg=H&;+!_=Rv&beDRslZh*uMWmeB0JU>VH?V>xa|*E9(_PQ@;BKFsBjl*XvnT@af~R z-KSUAv5@V1A1VsEb^)t#rJ^qszF#zy-Jli|$X}Ix&I9f46*Sr3bso#yUAV)4+c0!l zITumB@xK_hS1f^0t?cnr&%-IF18SOGEp1Hx;pU;5N^MtijV9EocFM z7+l;3?|g9SX=E!FtJ~#XEw#G=AxwjsKg~88U)BnlJ~-(Wb{2}U*O@ru z&Nq2yX~dd!lw5dzwmuSyxW%}-jd!wL+?zvQwH>>-QC2|;8-I83w-KS|^(DV&(0Fuw zf}aP32?zTgn_XuL%YyXlV}d_M4rAp2AHcdWj{t*HvhiQRT$#1(7n5+paO;z6O9qZ7 z?{EVdd#o9FTsvC=B4G+t=$D3H#b+NJCnP-0RAFiuuKgdX)V`pPZ0b#@EjX$O#M<jbW&F_l@$SmdVg$@g#@bmk4 z`%io$eGf~X0qG6In}1G6|5)L_Nx`p4e`7o#W+#bMJpf&Z9sdIRI~Q$9{vGl{RdVbf z2%?lEE6C6M`*`26i~L9^gz)ZXb3fbKCtuU$G1Jo&%SD~(;R zTw{}GXk(l}H&7fRo2lW|J%2I-Aeq7`Gl*2}Dce4xMndN{`=)08WEK~*0`$TplNaHm zp8)GhgBAs3(8=6ijDEMH{6+PzJb&};|Na5M{U(JU@;0T?n%mU4! zFS7uLz@sP;fN=uKgztebG~~kR4<;zrx<^#Lca*QLkA_c4YY?;DE)l&Q-}b7KO9DLi#C$s z7u`QI(LaCwrR~?)zbf+oHug8lKjjb1yuj9W{jK`9*c$(!624N37TTBDGj^|LzVO~m zd%-F=eUWRMTjg)jp>ymW`5fS=`K>M0gdD@!1Rp@bU&+z_)#IPkrlV7999oTtMb6{` zYGCSaTdy!;5FZPeDoejF zeY9bXp@GifCVc(%%dY_ncrqZfjzHYbe;&^N|9aSUESFNz3SM|_T*9LksTy6+koNCGU{Cyc$W{1KuE%x0JL=dlaBHdouH34XGS*q4=eGFN znXTRh^>li`LmYuJMl``cMu~1P{aW-v2>{X!SQO14T_1RK+tXw=o#d)0dXM5hM>i*C zmAzIim&3CZsJkgMd;4@<$>I20y<^i*Uw1E)x~uDKJyHA2`IzA8@4_4e;+wyKVbo%em99m&?S*CMuZ<5lhp{rBg!Kp=h>77yJ(ji}#EVWbX+!93B!xbUQN2 zF?7Z?Ks{V`s)Fy&7egAFyuNStE4o=`Ch9tO$X9r~L;7{d^IiL3%=uKHI#<^;>PVV< zyIfwQNPV$$>HogQ19L%6vtTRBy8Sr_u0|2qt=5~$@kgHr8O8g@vNSp=^~*}k6Xp_X z?KJMLW)p+r!(b-b#F;x{)ulaZBIAx%5i7?&<%yFXcf7VV>=XF!iB$oe^3Snp&znk& zPb=V^13pn^Uz87*eVWeY5UJ6fwhdVB#`ehhTL?EpL`8pjUGbinIJ=QP|DMn?hNuWy z)HCow>XZb9@5wc_m3lncQorb6>)%KX8g1VTd{J`Rx29F8v|GoLkD8z#aKp}DS3L@d-x+tB+d9_N0L8aFe&gCPv9#P=3xR2G@#8596D zM&lUw6am^)NWy_SE9`6D8jeWj_ zO>c+lbu{l70lr|QVBRr|IGuTF7k^)GVn3wtd|FhBY`pVBnep0Lk{~Sz;;MqRp--9S zhZI-e792UarYW1T-(YKK=!&Q->l3XxJkLaR(hkP)nqnQ&D^*vAB^`VN%!ffyGfMbH}v&~F9eOXMz- z4xHMEb>PcQE(Di|FBL;B)ko1+_Tz-}&Yv>MbWqP7f?if4|2oEf${@=)L33 zPIA^Y6wr5H$;3+RYFZg#Q%|X$eMbxgFzwW#dm`ax3;u*%JHF;|9m8FTbyn{@U)(9h zv52>od$-Jzt?~Y|P*;!+m(z%>m#=TX{(d|_K z;Z__Ng1bsbtKsN#W4dhXP+NArumf(VkaaUsfD=c@^zoG1c2bNf?&{v<6r=>&S)!f& z^Sy+|tjk+NT85}+d;U_`qI;mL&#ul~$9%>5pldu@jb_k;iKZvHAtTbsXb1S~vgm6~ zU7J&8=0%^5rYEtS_t(JKy0M;6cFa}(s11CQ;`gfTKN=2UbqbD2b|x~9H0VqGR@Aq5 zN?nQvGD?30HXI`Oy}g-O?UDnE$%}0CNP5uDEn0?+pO)XKw?vSoBelBh2^3@e=VJ%x zr;oU|#`a#Yi_z=&bZ7Dx*x(7R#4E39y^B$e1iZ%`9GOZyLRFdy5rd;dZLd}4hT4ch zT+9#fM5rALhLaq&*m;KmG&KLH0p+>_*a<{788P4^AhrEYRk1P0ozv&bDlJ`@{fyuy z#;4U5`=?2mNo+J4XBzeO&4VwbyoYcOE2V zo+wsQ;u|nyUNj|4#yYCNnYJe^b8^QB32}slh8|*>#?L)^)KATug2%^oUl*Y_b|4`2 zR=WT; zKfP)8KlmBrKM*~Sh#E3`byR55KyNNhYB%s_Wbwt3=(#_C=zoC@K}sEdxr(1K(a$qy z(f(tx`aW&spOHUR<{c0(y^q$x4p#pwKaS94uBhC}HRS2L(vi%e||E;B$5?C)E zP(H8xfvjsI^O)r9o-uDDb@6EAry_nQd6v!cIQ+27dvUnKv|N9S=s z6#gqm`ZqjB=TQK?K&q<@Jlx6^t0Lv(EC~fC}R9ZG?)L z4v5zl)B=iA8TdcEy?0boP4_=)M???-ks=^XiWHIF6)DmK1nHoJVyGg$1XOyL-a$kl zgeJWT7J8N5gh=m1Y6t;xPY`{c_xpW+_pWuLW^+CJZ0?E&R7|o@bd-Y|q9FpTN42KV0#T)7n z=ldMqM7Nt=FGY-u6+2-Y_d^?(8ux~Zy(>~m(F#603X>EK2(fKf8_w-eh;$w=Qv7*3 zhwWp7{+tpz=j1(beK@VK3oZi;KE`D+9Kmni{b!`#$wCsXzoYEwL&+AeqliPE1?LW| z2Qg!C53gOAbqtPTPB+8ims}d2%x0Eo0dMYRPwiszl$_$Ts71kOvLJun>Ys1yB!m0| z!=H_*A2-^i?2}N2L>`%muYksTp~~>A2r&d!tQDv?fDkt8gGwGaLnj(>*hUnOv9bR5 zP#c;#6{xt_J|GHoYp6znWibF#GJzU2T?_S(tAva{J(jK7+3jy;p&OhpZk1#ACokD? zRV$4)6W{;oYkgBvx%r7l=JYlbodCfj=@l7hmP&KzYyi8XarxiKrrssO=I;I4t5h@D z2we&Xja?^)8BU8NaC3KV>PsbuJ;F^Fv-!Du5Y}Q#u>kgNi!y#|K+L_4*|VSE#T<6biK*q?*wb6&d+xE+50&5nBYt1y<5hSF0?x}?MxMPs z;SepS!0k*W=GYc1fW7?XTX1(}_8M)N{bSAsX0eM;1V0RKXW$OkFA!tpj`A;U2QhWo zEIX?mxY($Vl!>9HGEzTe_`4upJ-V5toaBbCU=Q!-S_g<{2p(n zakudGT)rETy(F~i;xD7jrBkEX{z@cDm@2zzo#q~~-UG&zAO~itndGjo3)-Q2+hQ5Y zD*hKjbmM%2-Sd?5qy`y_j@r_y_sZxtt(%geTbF>EIwne9a|i8DJ;$tbKju2;J`70%4gDJH`$wK%da`OBu_VcN53L#{h;&<$ej8L`H0MtdZ$b% z*mCwAxm+MK`5nIt{aLZjm!;_`O*g}&pKz@G))y~X0^i!l=$sl zdC0$!rKBR*3ck(WuO4|i8lA8$e9(2noh zS0#;yuHIMUKw8lt*0##Lj#pp6Bpq+3S-ScC`bFcj^PX$JaG=_O%dXit<-FhbEM&Sx$T{zO>h2U+m6YfXuy;Kpyr1A^!edIx zRWfy*O`I+|rpyj>Bl?5ggNWA=y=mEg9hu)q#F8$l2G73H94c~nNMl}c{H#&9s^N0Q zV+LoX^ndnOxe7dSOMCZT5rBhz1Wn=>y{VwF4WQXw}*IVqWN@N*77LQG`n z-oK;Hl9=O%2xZM@F}|U~dX*Zyctb?W%@?ZH`i=D?gCl0y<~~kgHpmTDd-c|Ivveoe9TQC9yY*u$ zK8J8&Os6a!#3${V8vCW~W%B$P{)pY%_+5D)X1b0~|3wH$fdIdx0B>eLOIZI-RghNx zhi3NMsX;fBh$)Etm~}O;-{`CZOJ++&MvVaV=;YH1?YER5YXD0IOyByMdO*6yfnqa| z3?KS_Y3$ZWn@vb;bN|d(j4{=|4bQo35`9bN8n>o5&$9&@oi@w+SAo>Rw@$Mv&qDn_ zrx>E3qoMN2lJQ@Wt&lg6l{T*5pO4g*Uk7^#LJ};GO zdcs(}wHwPsSzNbCN^Q7Iz7=r&aa46*tX~LKB~S`Z#`i5`w{)C6Gch~cFFDgs3Dr_+ z3X3DAFRyk})W53#ws3IzIwNB)!c1{cSUoGO$Efz*?Y<>L-%Hr6$MBs9^GPjICEG9l zw%^aKF`i3F4M~u`!OOP5C(f!aYvw{`l?nxb2=MAp7*FO(^gYvVlbnGe)@R`6=i1h) z*^OAt05Z{E6J5Rw=jD5Oj465Q&cV+?39{FeX_mTOg-DOkv0^YhJ8@OtCs@e-KE`}> z*-d+ZAK1UKAg3#(Axf*){6`tZ14g#gzAQ2#w$3^|W>7=)duZt@h&@vbxg9K%IL8!1 zbl&-)=6r^-f}=&FyRgLhe2zF5`{4ep#Dpj8_L(8Z;MQiAq_Q4 z>=WIl24tUTsx;G@6}nIUTSft%*5pA@v?`;^W6Cx;q7n|W;ALm`)Q}xU7iM>6oQmnE z>M?b%G+w<&hF9HZ2qrWl5qUNe^9=$KPib!RIhx66v$}?HX7)FcR5DYlL9|(@H(8J= z(#|~Nsq0%vVJY9e$UskJ+C}=h;s(b754*-=bSwye^GtWTlfjBOe}(p`VrI zxZ&VLJ%EoM&}s4NzMgf^_P%YQ=csm5UbeY5sgzCVZANd_2v$l~dW*4W1P7$4x#@jV zds8Okb5a>E6Qi_XhhVw^w-joH>@I}Y0vReaF{wH_ce#YPk@i^xp2B}v=)d{^@PtRDOcmHbY! z+XO;rg#RBIJKfjtN>`)FRgcuFQzP!wj0(R9V?4MBYjOl4W~bKBAC3Hf#h6b1@cXsX zLhAot>@R#+}mp|ThGGcYP9LaAO`?ZDr_2}A&%HIkR7W{&J{G9N%`atsJC ziBo}zADZvB_jYVF`Gv&WODFC9+nPPmWBy5~10=FOb7q9t3cTkb0UZnQ0l!5CY;U-4{vR#fKL+W)Y}WtHn!g&& zKRg3geKG?NU@>?%j@G-E{mHUCgqm?%{U>s8gW<>KVStw5=rOn*rSs{NA4vpDHcl;pG@xmd5y@wPa29Y9F`+3vHAUM;HY z2_1h!eiY$Yup(*L6)AnZUqb{or#)o1pQ^8MT*?e|Nyl_qO0Dmz`y32R&U*HUE`}YB z+52E8#A$_WmL#xay&HH>%cS*Xq{IGnplhYgnC7jkP3W|j9UK*!K?lF2rZO7${CT}H zwfJjcUVCL3>n5}I)zgq zmX;rW5JF7m~qJ->z*Vk0z1YS4fLs*}d~Qz@dR& zEM70|ka}TN~-R)y{TP`ljfO*;)x9i#5tw(S|_O-R4r&O7*tfMTtexBb1Cw zv-kZe?^AD6H|orbVKM|b^0HxfkiHk;MZ@~-f>^xmkGvKO+P?VJzfO=bNi;IQGy=$5 z!R#hAkh|u*oQ}Ridg%^^?j8%Z^eA-u7pqeuCjkIQ*e3yX2Zj6XPw=t)AkmKw<<6g+@zTt7Vy^DPOP&o_O zdpr6__Jf@KF*|@nf=|(i+eGMmLSSibp5g5Z<<6YpIj@!9KhjP%jf%>&OzG~e-*1zb z8N8}b0Hqoj?*%C2t*rr543sm5?c})(v{%*DtpBnwL)tR_ihNvQq0%@sNO2y zMn`~a(VTI00_HECd@Rm;^#s^2uWmp6o(9ExxrmuFWxmVqd8Ch}x7@y*?o`m${nd#p zv9%?PyC$=BQAc$*CC)EojG!bV>&GZqhWoXjsU(S*^bb$eC+3Sz1$1N+5oGYE7R&H& zJjoMx2yZx3Q*>poKYArnaMV3fVOEm=R5Ee2$c}!q{HJbzMs?fOaTDWQqxTJ2Do4~+ zy%C(5OiPP_x5s17BsKE2iu%dNz$9_6X>wsR*p}tskTB(`)^~3hJaWhueB(#sxpHd7h6){@IMb_WwKRbx zdhQm{T1&DGMCFCrh8!vMJ+*U+Es2%ZJC+@ek7n?4tU^WsLlitM5S7`oHy4T!J zVIe#j&^)vtC(~zF-@-JXJrc!~y*G{1yI0NmU=d|#_LVaCp3%Ijy@!_h(;kTlug#k3 zSMx5CbSepJo-}@O#_BiI&QmTf$h69F7kCx;Yt>RQb}SJrJ5@T-o?FwT-J~;Pi96tW zxFr%gMSP_}FM&p(tjETO;TLou_;mXz`*Q39n_KAcx&t_gEce{ZI=&SW#S zD-At>`$+xdg{rblA5Td}o@_X3cARWPVxYE zVmY57CtiBZ5H$N$uQsST)pR_H=T}%6{gmSH1lamoOh1yASwoxEME*ZaA%KS@vnJbCEjxQH&cTuSI zKi}?O>>p-TZ1rcQ+~HOYlyzyk$aC#+{<*PdChADvu*bYF2NOK9wS1ZU1xjH`Sn8XD zhfI{M8GGEe2~*V1fnENdSDF{$9g8^dD45VM0xnwK@<+V$6*Xu=qZxsFDX z`e?t*K`=DcfW{*{-ZBb5KZ_G#SYlhQV3&Omvs%>b8h+ol8cNtK`q?*U>ETfq&wOdClO(LuakVm4mx)%)iHckhvPS(Jw%^=0LIy5mEao8tgZd__BVW zb>?N^UE)hh$eVikK@-nX4gfSbP0SUvWy}8*uGMX2&g+{2W%W zVbdxlITvBG*bLN!wU2_xa0F(&Oyy|E?3=fdc=gJtj7;2p=TYzP`cJY72m~YMY-MEn zSgAH|F|y4GAPthE+ZWylsM3&`w`LqttbU&HjGfq&Ft+A) zcBqEq%>BY(@*0*qmn6e)cQ> zzc9!AUj9afs$_6QY;(I&t)X#%pMEHS>h#;WE1M%q$6Gp^-{SDi3ki1+g)~z2Ubhw! zm9+ob{r|U z$8#yb^MmFTx9@$WS6GlOSC^7BPhz1sX9=p4Ay*#)Y8V~DW=su+Mp6KvvzeI-8yEHg zcuXNJMIdJgz(Q2S9xy$IFWq&geX@(*_mpCFO(x^ai46#%VU4lVC*$<9p|j>=#aF`O za}(RxHQBwH5tM|ztv~tdf^uhpisXxCa>N}?R+=)|4(A;Lf%JOd>`@w&?3bc6Rm+`i z2A2Y-e0~sATAN7W8qH|M{IVM#9RfT8$D_192v9C)kFGL#>{rW1)FLsXr{0?1;l+Lt z>DvV}d(pMhY60L8V)ZI0H7CnI0>#af+NjQ6LYlm6H|XeW7$-)ciG!R_J6p)1wpTv z9}gCfMk3gLsB*qnvZeT%rFKbnPc{9B)byhOZ=%inhk5U_0IbQxRGU(bGFsE9d8pJ_ zCI_K*m6}e0DtKrFKpTv@sJ-IK7zW@>Y^Z=G{UdrqoBNfCVn!rCyi+9$ISP{d3OG)) zT{GKQQUJ^Wlw3|^KS^J1ihdVx>wJCcGsr4b!&$#8FA0;9=W6gjDN zWQdjIHDrJ&Vo2Th3q`C+6p?KfN*dmpO^~tn98Oh5=FvwkbIw=t;t1%>YGmWpjpAi4 zhe+BQaoXojk_glGNkQ8L~YGyhs%1R2o;$r2Rf)yQ9^o%4%xJWZOTHd#DSYf5lJ-2FzcBiSe_lIJ&rRT2)ap{D15pWG!-90U3RHsd02=6emC)~pn1!4EZp zFFS47&XyoAupSBv#gqGoB`>iDWz_c2`j+T!2iSzPCAVd`#Y(-8MsMT1c#|l8EH9X> zbS%dDU1HN*BVxKU*}xeJ(Z87@;D7%n@r9$PhfSWBFX!DwF+Y#otp=bigSf}j(2PcJ zUz?S@{q&$tTk+dlMVnBl1+AJ}W$5jq*!8B8u!E0usOd5h7aMYz-@%8QA`vDM(~RS< zp|6)?F67_mhWnIXLw+e)^nA`Xr+T5Cm5*|aYUlg`hx5gQM?vxZF_jkFnWmD0<3a~w z8Q;9Mq{j;o#Xw=TX`fevZ{;GaoM=7Cm~E5?cQ&Z2?NFaI^pTEvQ}$|oiF!%#j&o1D zNW8nzfH&EM#%6&(!a zd_I+9_?YzR&TCDE5TfpCb=fRfHE9lvv5@zyKC= z_u~!*K;sz?aJ=tRYHDWVcZ)~yy0L=FT&wSTl$Z0ET@d^*IO1${W~`iW*DhkE)q|{! zEVSXijkKaq7Xcpvs3NUz@i-Inn0+r8B{Li!p}@5!ZB^IoD!40xObik3dTRDWYs>Vl zY#M@v;$?EOqPSrc^1U4 zuQ+Gh%8!{;&K~s@WvR$btHEr9*UFhA6ulT)EuIXhJdZ5LU9HO)yv8W3)M!8iIIR^Y zdN`klH+%!aPv>Ly&L0D%Vzk+6dke5ng%3Rrzegq$3)s&sJ{b}~J{xg()L-s!pcexS z>hn&3aQ!p!vCr9DStMyVWjXiAR5%g@3%t)|a%lE?2g=KmS~h5?-YjJSuk`Jho!{m+(`W9uDzL@l2Y*eJoLYtU1kRi4j%|LqSAS8bmFymhtI_kdm~KD_Mz?GUXf zCtDfEIm<2XxY7@Z?&3u8SxI;1lCbOjH;5a5nPd!w92^eMx4KBZY$aA^%p8}R1vLve1?!LZiGE0f(6A2DD8jXn?q-YP zNYBS7G?k%$42O7d<@;I9CR)@9W}Xy^>;H8y|J%n8ynnhn{>DarC#lHFHUZ|~pBj#T z`=e)GsR9bg-;^m}ET<_~B7Z#C_zuP+KB33zcr!PA=JNN{e^-5U^Z+29_|JzVdZk|A z8vG9WM^NQHK*c|Pjk(wXZvnIN+xdO!7yR$U;Kz>doA^fAiQstTKnL(!&*A8+g(yXY8VdwK$;ogrxZvZZP- z>GRV2t*!LG%m9qjQ%c&-JpiobGCKaZTxCUV(kJqi5z`zjVUql)CnqO?t3OnyjS)w+Om@2c>*$dJKIwn7A z4q>PzyVhWre*43!9ufeLufb)G7uP>1OMxV36G4^*u`MPvIwaiW_)~cj52(9*D+|C? zmN--sJX<%9Q(h8Np5Wv%4$bvm6XCwTn_`(Zj)RHp{JP0CSAwatpDYBJE6AL=^>FYg z(yPMp*S1fI&z4Tw&%=~P`C7>10&aWJ;?0O&Ip5IB&XHm~$sgT&c`fU5OW24+TVKzeD^bVq?%&~&SkQzU4tH3#Va>uEMw2CfD_A9I7+mjY zj2pMJ5i9fylMB+oqepd+r(T>yh>gDk{-1VB3(JP8He+`Zq{altvsBudAt9mHc1cJr zyq-+DlwZ3UgY@oBN=v}?EKjcM&hhv#WRP=YF<`q)_6kLi!G)2w_BFFv%|uo3wKD#00*Xn{-3aIVoFj|*SXWJr72ZNd#PyRsdJ5dQfUkY~eMD+9 zWVImM>1hPz<#Hq(BPZ*M`JHWOv^M+mUhwh5t(1%}mmqY@IPZk@${EI0&e}J<2Qs7% zut2qnu6Fadb~{}WF!feonfdR~qzIh~qtu@&3ZbI!4vA~Xps({_wl6v9*;Ikf1?~dG zalsMJ83;i44o{Al9HP`CyGG9Ww&C zunscrfy5AkLszJ@lGYXp@|OvL+A2@8A0H8%XN=i`b>6||&|6!vI?);Ifp3OTutJ4B zsQk0oqtu?e-Hg~=&TqNEtV5F_>CuS>20RxI=~j)Z-We#$JWIfc-@I&m_xhW=_W2@T z@+rBRqep`A$Y!SuvXCikfp>Z2r6}H42@Cm*Fj(@8&T}(+G+*3IQPmHe@Kl2^7mho! zALRmufNc^PPS6bJo3|T35x2YL5mafBPGX90VIH!}qp zgw2uUEf zKFe=ZEU*ncHe5<(0@+QTE!x{mr$Qg+xpH zY->G-_nJP(5uk50j;75JK4vY>1W?pgdwKP!PkdkoYMOp?lkqguiyMB52+%`O>f>dd;n9>uSV_V@-jN0g@{CEjhN1mt_uN>>d!&&@DOfW3Z{xIAp7xspqI|9|A z>Pk5E&2t+VosdLN40$9uwa9BX25MCRpUjU`Gc-ngl5)U6Q1WeWp|uXrt$JfYxqA)h zAHsGk>4S!bB7J$xSiY`yK;Oa(|D7C@&w+L|Q^y=k{r7xL!80F%GzO^eSzpf@=Dtb( z_H47Kqo{mXM?so|JnP0ytm+ zXrQ^w*MIb|juGawZwRotfUZjX9$p?F3#g^Q@XiKZYztJ(29C<#+4VqCRp_#}%9tad z+yc3D%Zgn)HBL$l%Wou%Uir5xvfjEAr}>X!*Y@ET?N|FNL@})%jZJVBHnV4&57h%* zPrj{4bc(9H#R8tJ*ZHklo+zrSWY;=4zr>(*iAGO!SHawmuV~K8m1gQ<3Dq!#$R(b4 z@zR11{?$b}aH^DiBOeLYNb*MKDD&zrot(cXRk`>e?P-Z`F z8-Mia|ChRJcUZU2!9j!H4Bz-fE4I=zxKQ)Smy;4cU9=$ectjKaYUOx-)T#uPVK9-zvr*~3A_A^y^YPzX zK9&Ff$*BLdUjFMVe<%KLf%u=beqwX{aby0mX#a|Yla8#^fZM=FC)0pi6+F?%TD0We zuf;l4%9C7g00u!ExJ&}TjVidmH3cneL~NXFrm z&Ryj8;I)m_hCsAhKK->xPvyn3jQSj`k*#gk!}5}i?F+EC)vn1U6?lupiqIK94<+ z_JyAvmgKPlQkrJJPei2&0#Nw?cDoZnmbTY?4{gHKiVv=@u4U>N595a-(5au5@cD4U zEPZmq-Q~!}Dx9a&Fg4qFf?O;p{1)?q9{Zq zeRy>*N<+<-;zYFsfV&@BUAR1OMPAMf!$L7I+V;w{9{cNjRgan1q!TT-c%x4bHMym4 z$>C)O)KCHrw?1iGVCD=XcwzW7lj36N#Sps>ZCMgd4>>=cHF(eH_x!Ow83UgXalhRJ zF=clD+^p*ih zpyNVMns^zyE3H=;BRe*kgl)9!NfpJRQUeWABXR*@X=_O+*x8dwX62R3CB}OF0kF%@ z#ezDkd;ZQ{idltKV#(|0p?d^bXWv7;B9zpN*>UJO zS|ierJZ`R?$5pq)oP;{LeTI#MFl98>NDhD8!RK60oK&OEk$c)isy)GhaioL-+P!X7 zqunjXs7KUq7glvNor&Z_vhTpy%_i3(SUc}^O0QaRPiRYLli_{cv7mQ;9z)xE&J);N zQOB%LIFV(Qh&}H;0mryCta_c#rzF^dTpXxIqC>t>%Wf+csA0iMi--iaUz~6hjTq0x zeW%0bifMD_wp1)CMJ=}X$**S&rMd2O_XZp{NZJ=_E#F^U@>B)>k+S!cv`TTAib67P zHztqFyLL;|p#qH8-3@@dFNekaG3qu`m;iw z4k##3Y*i4^LbnGCSd!JFG3hxd+a6}$ zQp`uu`x3)FH17aXI5MABD8u&}?07Cl!%$v?Q&qVA9NMBJk-%h^qW-C4o4FlPds<75 z%+kiA+I$nqO|G~uXGHb#L~e0uVeiD0+vHP+xKilzB8sGILtc5}sHGG~lB1*V^%d$! zFNfzn0^NF&NtPMzE$LIO8E#*RMjAj2$yM3x-Ko5*c1SOd=6FD~R0yzwBTCy_@Z-%BU0-4{j zPQv%0i`xt8iX%Fp`K5IIN@%GXtm7cjT($x$!>72zAZ+x@2Q@)@FV#m8EkrNL^ zw%m(c#bRv+>xxYxDpx!u4;pU3cj30<*9ONwFFfK#rj|Fd!o|5ON9qb<+>g0XHXpTQ z&3#jmcm2WXSrH#YJ=D$s@}Z;lYQCP#P{T&{&BmAOz1xj7S$Z9jUFJ;9O6#6zI!wmV z8o*o_p%`F!bz-6D;zgLoDiGT1e2F+hZ5S3UXmG?FTq!t6!Wd1Aw0ESIE+k@1y_Y%< z>*+B7Mt|=QrcU1N3=bRb#bj5NWPr6W&nf+4lZ9GXu_AJHJe|K{v6MCrZME04DRi6L=vg8tx*YADT=1SZ!CGsX3WLc8i@FV- zm>WLnixT!Df|;7eW}-XP2Ybx4U#*HA;m>a=?)wh!4mn+zK0F<*TZNdbcSm~`?w zwq$12YPL8hz~arj*M+sr@JC1%XQt+*t90plv_ab3nJc#zDb(~R#~xTd`e9p$O+Hd| zRe>ASS4v;jY?$laQf&g|r)4CCTcRy_wFu+HLk5LOX>H5v%nglm58Zo0H#`k*ue?yU1pZ8sl( z*g~icb)p77d9~k<13A+eDOF|qXpO*E;uUvCGwbd>ai~plRt7{c4NzRSTrx1FZ4cc2 zQ}~0fSSd@)(6v8G$XP&5h;!`wZr-DZ6A(m3RJ6qu?Ki$SP(?|a)5A|<`Uvc_z)s;~ ztidf-ir;(u0pZZ3&n=HNiw#88Ro1~L8xcAQ$8D)>cO-Svz9wWj2P{YkCfG=;-_Uww z!f9`w6p`(x%_c*55wFTTis|tB223UN0bvD#6}%wgT=L8Cg;&Th4Jkrtuid;%pvA?ur&H^(5#=b93;m&+%8?Gm1`Pu>KJK-BVGzh>M zcVq!~yWc0EIjGvT7jkr1+gt6EJg{Vo2hnziuCypk4zS?;dKy4K&c}wVKK*L?$mv%T z+A{M%u)DX8{t=K-nbXvX=@Tt4Rm-l!rHkLOQ@LYAZq=V>7E?r6(bw|gd8-)1=m0?L zEy}tvGDHtUFNf9E)I_ztp zr=jDu@5F&EPh^HFlDGUlisWK3>N}IH;wvOBKo5e@nyxMX2i`anN5e$ETJ5C)0L5OPfi3Q> z_Kvf(?CCJ#Q~aGm;kA7VMj0h_$N^VZuBSg%Xhg;oZY4|4T|Q`dV+!cm+#8dJI zE0{i}Mzu0YP$3WVuptQ5BRTF^U^olycXVBp;HX}y)rx6Jb_cui#El3=!qBg2Jr`}A zAEGivp+=Eb@TO!P!XaaZK&7@YVaNS0{<_Y`_?_WQs4f1qA}ekn(dt{Ouq{buf!1cN zxDmibxhp}3Si`Z#qleMBj4THsPl`SPNM~;cLCf2XmI|XOr)zRY2|fpdnOX4%Y6Ng3 zufmZN@8`6vy!$yeEOkCQxkp0*g-2*=bs-P;o`!4f<4cvo3wi{lC56O4z5_d6rUF&u z14V7$8u04mQzeYc&^9+f@AkT_o`b<4+P@beQW(9?gpeW{&ro#Eo$z|1U-b-Q=2N#I zAnNe}Hu7;Tf8la#t&6p5hBv?`3Ze?1$xnBkS`uMX?(s8%m4^k2j_GzKF*S-LByC&k zQ{jahU*j{uC1trV7~~$!TjqR#GH}Pv$BSyvj~tmwO>BuJ zW=axtqTY%(jJO3XRB7LyFU!QzEYxUGsfh&vt^u&xwL+qLR});Av;uED%RL(4t=pk{KrazY$KyF9^OWrFk4dP^5_@Uv}e?&6u8{$Vqg9*0 zQlB)~)PZEB2S-=~nH4{zd#|MDL0iHa%o41^sa#sWIu1(WH}j5hC^tz+h4yi~Y&oui z%cDgfdb(Ru@6wfk6ZYDA3q@tUUA!%fbcB2o7L`N=ohI|^d8n<2BgNp%(q-6Y&+a7Q z{;FfM!}SsAP9|K@f({1Nz>LM4Eq#DMxuE!DtNwb~o5f9c{_MxpVv)kM;q}X<4!1m} z%Ew{iHW)q3cDRbih7ADE$7ykYfr2fDo1(bM;=m}d!MQy{$2~3_(js0? zGEW-{jbHsv;?uz0UiAd?$*NxO35G79;Wd2MZVA}D15ZdwE`nsZMVa4W_i_OBLOJEp@w7mfb=?hoS0(<1>+;f za=i-lZZ4g5cg&r*XL?1uqYdV%(w$qVlq#DY@rA|Qp!WNDtqaP(4tX9GTV5&X>RaK% zAiYSy(}TxGfeTkT>`SLU6oiBcvl<|K3q-f&1Jvp|e{dJOcK78bZCi}(ro9}c8?cD@ zfcHc$7|WbJqXgIH%3NuopD&*B9B_=dQ-sTCH$m=FoqgJh0z!891Nsc!5Lj@1z11+l z?DFa7#lU0SbnRW24KRu`A0c5DL4Lk3Y5CQVXwD} z3`vK@i1AV#eq>l!o18_h~giK80Pv8)A4B{S5v`a`DE+j>ca#kp>xOe_J+xMwSoI z1Bsmc&OO01b@kE`&|FGIvIX7QicQ7<;erE08+@Q?j0eSbb*_ZL0f0}{!>HuT- zDDYBjAYsEg87n<1kb(JJOCU8VfW&Tzv_#{KKXd2+pu}96>3Y)Md%~^)tqf&#m@k3$ zdZPz?EzT}^EvA@1Jks6%UNy}2R5WcF&YZFk#C(uek_)x-u^jNi-JKARKg6v1R6!ds z?yQ4|IMDgkR!Jgb>6YrA2yd?VYrk1>?$zRc$C0XI-&n8OXP0%WbP)g>x381On*P&P z$3hT%0&x{qjSw3PD3QQyB}luLy8Ud~4bCRBT1)^9(-n19)aqI2EJ!B2-fw(i7qSB>^@_LuJN73nt#*>&JCZLa zovm55>@kD0xdbEv1d+HQ%TTy2O((8lQLi2s3xonIgkV+UBbuFl1_C~k(|3*+MF4!2*lt2e29JT!N= zSBo?9UU6Q-wIA=cj}h{wt~`u z*EJM!2J2Q(JVur9XUd-5Q2*xnvNv5d?TzqU`*yEE#=Cjnb+^bWIAaij;TKL{ez zr~gfwGmc>7jz*->%XY&%L4j)R;@E<2lRES35O^-5$g_e4i{JhX1U?t!aZ{doAzv3Pav3fEWw)qdPuM+Wl0vG`~HHs<*J@`nwdAY9L2X zW+M$) zkM2JMiEbYlh=EfGI!YQXbny%O6voPh&lm7p*4l-VS(&j8z8Q{W>mB>*3~Ld{xfnLT zx!q&H(0UW+0!&(sxY^(7f6~#2`!>$9lE9WGP!&9uStn2DY&LNCGrv>xFFn?-FH@f# zQBC5xnhT)UjPZ`!{cXT4eL&4P<+0>W1G;;Was@l0?g=d#t;3WYwz##t_oLAx+%5Ob zJw9+79=aX@O^0CG(_D)rN4_Ma*FucTd~@r~oH_}9OWd;`_4gXhj|Adi*agptt;osR zK?rceWwD>t@qYKVku;Fuv*+xrvAXu8MWA&)rmn3peD<2RJ$^(6x>7znITyJykZ9_2 zrw2^pIJ&W4pmohI*{Ci#a0++Kt=i@7@$>UR(^_my^1>H;u3ccpn7J}r%8Md)kIFn( zAtzEvozCa*K)2y|tgz9d?(YUZGygfz)+7jVuMh59*}O&> ziSyfcbL+bxT#Z5#y56F>07jp@z6_ndv|6I=k=t16$dG#&lo6HD9XUUpVrP}i>{=K( zA8{m(7VOkDmRQ{qeaZLFti9ao_AaW5u9Kp3HQX)}Zgl?@8>jNa+xuv@`%6}&aYk$! z?7)j{m%6ikLKs+7LCSFIkw^>lo8ck82q-@Uv!pP>GyF*?qOEz@^}t%F<==xiG957V z9Lglr2}x_++pwjtSxBx>oaQygjxTHy_4w=sy>eTnO4r3YH?Sm2u zNU|xHdMd8NvejX2BM0+~d!hm6KJ)d4@jrWLC(^ah#3%dCQ=^(Y9Vf@56Efl!uzv`Z z;Cw~-o%_d%H7~yj2BK*c{&hxTrV>&XP*SDd7e23}Y?31<#|1L5`=*h1sJ{T~Nf|5? z?qFmTk5<2u$l%?*MZNm>Ncnh99LP_bzngjH^iaT`judB31q%MziQ@9-8K7k01Koe0 z{neXJwT!>&#&69g7WAj3)7|tRt>d4a`>h}S?(sx-`t$5W(>c|r{ikU=(z=N63S%tv*6fboQY-y;}%cK!^&Q1(B>17BhV&fq&f zr5yj$JdhaB`_7)(JBjZBubO9EI#C4A{PS(;w@bhKeg(saIDfPAWR5yWXwQs31DqTu z-vf5;x+iMcTHN-`=bn#~=RUjg6$!|%Ub{mlws zRSs^a0cmF4>5`sj&g8f(XD37yolLFGaPv)*0za`-p#9I&AD4h;p7zCpo}2?1fAM4` zodr03O3y!!{*s7)gkR=(ivCi4%7iQbYUA%;|LX_%K2F>C%}D$LIC=E1#qsYR{^NrR z@~v-NXU@DTnfT<1uF4&&8LO}!@i8zy^O=gyc~ArQf2|U9?E1(CG9Uln=JDITr=r`B P0SG)@{an^LB{Ts5f4sd; diff --git a/docs/source/supported/amarel.rst b/docs/source/supported/amarel.rst index 83127b5f6c..440354953f 100644 --- a/docs/source/supported/amarel.rst +++ b/docs/source/supported/amarel.rst @@ -51,7 +51,7 @@ General description .. note:: In order to be able to access Amarel cluster, you must be connected to Rutgers Virtual Private Network (VPN) with a valid Rutgers ``netid``. - + .. note:: @@ -97,18 +97,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: pip install radical.pilot -MongoDB -------- - -MongoDB service is **not** provided by Amarel cluster, thus, you have to use -either your running instance of MongoDB service or contact the RADICAL team by -opening a `ticket `_. - -RADICAL-Pilot will connect to the MongoDB instance using a corresponding URI: - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" Launching script example ======================== @@ -125,7 +113,6 @@ launching command for the application itself. module load python source ve.rp/bin/activate - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/supported/bridges2.rst b/docs/source/supported/bridges2.rst index 6a617a5e68..70c5b66d3f 100644 --- a/docs/source/supported/bridges2.rst +++ b/docs/source/supported/bridges2.rst @@ -82,18 +82,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: # OR in case of conda environment conda install -c conda-forge radical.pilot -MongoDB -------- - -MongoDB service is **not** provided by Bridges2, thus, you have to use either -your running instance of MongoDB service or contact the RADICAL team by opening -a `ticket `_. - -RADICAL-Pilot will connect to the MongoDB instance using a corresponding URI: - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" Launching script example ======================== @@ -110,7 +98,6 @@ launching command for the application itself. module load python source ve.rp/bin/activate - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/supported/delta.rst b/docs/source/supported/delta.rst index b4da0f0168..8f97277e32 100644 --- a/docs/source/supported/delta.rst +++ b/docs/source/supported/delta.rst @@ -81,18 +81,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: Polaris does not provide virtual environments with ``conda``. -MongoDB -------- - -MongoDB service is **not** provided by NCSA, thus, you have to use either your -running instance of MongoDB service or contact the RADICAL team by opening a -`ticket `_. - -RADICAL-Pilot will connect to the MongoDB instance using a corresponding URI: - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" Launching script example ======================== @@ -109,7 +97,6 @@ launching command for the application itself. module load python source ve.rp/bin/activate - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/supported/frontera.rst b/docs/source/supported/frontera.rst index a48c3011c7..49f7d2a356 100644 --- a/docs/source/supported/frontera.rst +++ b/docs/source/supported/frontera.rst @@ -58,18 +58,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: pip install radical.pilot -MongoDB -------- - -MongoDB service is **not** provided by TACC, thus, you have to use either your -running instance of MongoDB service or contact the RADICAL team by opening a -`ticket `_. - -RADICAL-Pilot will connect to the MongoDB instance using a corresponding URI: - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" Launching script example ======================== @@ -86,7 +74,6 @@ launching command for the application itself. module load python3 source ve.rp/bin/activate - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/supported/frontier.rst b/docs/source/supported/frontier.rst index a4c2dd1c80..1da00ce6dc 100644 --- a/docs/source/supported/frontier.rst +++ b/docs/source/supported/frontier.rst @@ -100,26 +100,11 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: .. code-block:: bash pip install radical.pilot - + .. note:: Frontier does not provide virtual environments with ``conda``. -MongoDB -------- - -OLCF provides a MongoDB service via -`Slate `_, -an infrastructure built on Kubernetes and OpenShift. Please ask the RADICAL team for a -corresponding MongoDB URI by opening a -`ticket `_. - -RADICAL-Pilot will connect to the MongoDB instance using the provided URI. - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" - Launching script example ======================== @@ -135,7 +120,6 @@ launching command for the application itself. module load cray-python source ve.rp/bin/activate - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/supported/perlmutter.rst b/docs/source/supported/perlmutter.rst index 0a1e79d529..1a2290ba12 100644 --- a/docs/source/supported/perlmutter.rst +++ b/docs/source/supported/perlmutter.rst @@ -86,19 +86,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: # OR in case of conda environment conda install -c conda-forge radical.pilot -MongoDB -------- - -NERSC provides `database services `_, -including MongoDB. You need to fill out a form to request a database instance - -https://docs.nersc.gov/services/databases/#requesting-a-database. - -RADICAL-Pilot will connect to the MongoDB instance using a corresponding URI: - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" - Launching script example ======================== @@ -114,7 +101,6 @@ launching command for the application itself. module load python source ve.rp/bin/activate - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/supported/polaris.rst b/docs/source/supported/polaris.rst index 78bfd5aba4..b72be3da51 100644 --- a/docs/source/supported/polaris.rst +++ b/docs/source/supported/polaris.rst @@ -73,91 +73,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: # OR in case of conda environment conda install -c conda-forge radical.pilot -MongoDB -------- - -Local installation -^^^^^^^^^^^^^^^^^^ - -If MongoDB was already setup and initialized then just run its instance -(see `Run MongoDB instance <#run-mongodb-instance>`_ subsection). - -.. code-block:: bash - - cd $HOME - wget https://downloads.mongodb.com/linux/mongodb-linux-x86_64-enterprise-suse15-4.4.0.tgz - tar -zxf mongodb-linux-x86_64-enterprise-suse15-4.4.0.tgz - mv mongodb-linux-x86_64-enterprise-suse15-4.4.0 mongo - mkdir -p mongo/data mongo/etc mongo/var/log mongo/var/run - touch mongo/var/log/mongodb.log - -Config setup -^^^^^^^^^^^^ - -Description of the MongoDB setup is provided in this -`user guide `_, -which is the same for all ALCF platforms. - -.. code-block:: bash - - cat > mongo/etc/mongodb.polaris.conf < use rct_db - > db.createUser({user: "rct", pwd: "jdWeRT634k", roles: ["readWrite"]}) - > exit - -RADICAL-Pilot will connect to the MongoDB instance using the following URI. - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="mongodb://rct:jdWeRT634k@`hostname -f`:54937/rct_db" - Launching script example ======================== @@ -175,9 +90,6 @@ environment with ``conda``. eval "$(conda shell.posix hook)" conda activate ve.rp - $HOME/mongo/bin/mongod -f $HOME/mongo/etc/mongodb.polaris.conf - - export RADICAL_PILOT_DBURL="mongodb://rct:jdWeRT634k@`hostname -f`:54937/rct_db" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG @@ -185,9 +97,6 @@ environment with ``conda``. # - run - python - # - post run - - $HOME/mongo/bin/mongod -f $HOME/mongo/etc/mongodb.polaris.conf --shutdown - Execute launching script as ``./rp_launcher.sh`` or run it in the background: .. code-block:: bash diff --git a/docs/source/supported/rivanna.rst b/docs/source/supported/rivanna.rst index ba394856d4..12305e2433 100644 --- a/docs/source/supported/rivanna.rst +++ b/docs/source/supported/rivanna.rst @@ -74,19 +74,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: Rivanna does not provide virtual environments with ``conda``. -MongoDB -------- - -MongoDB service is **not** provided by UVA, thus, you have to use either your -running instance of MongoDB service or contact the RADICAL team by opening a -`ticket `_. - -RADICAL-Pilot will connect to the MongoDB instance using a corresponding URL. - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" - Launching script example ======================== @@ -102,7 +89,6 @@ launching command for the application itself. module load python source ve.rp/bin/activate - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/supported/summit.rst b/docs/source/supported/summit.rst index 53dfe452e1..51e540fc47 100644 --- a/docs/source/supported/summit.rst +++ b/docs/source/supported/summit.rst @@ -37,7 +37,7 @@ General description .. note:: Launch method ``MPIRUN`` is able to see only one hardware-thread per core, - thus make sure that ``SMT`` level is set to ``1`` with a corresponding + thus make sure that ``SMT`` level is set to ``1`` with a corresponding platform ID either with ``export RADICAL_SMT=1`` (before running the application) or follow the steps below: @@ -120,21 +120,6 @@ Install RADICAL-Pilot after activating a corresponding virtual environment: # OR in case of conda environment conda install -c conda-forge radical.pilot -MongoDB -------- - -OLCF provides a MongoDB service via -`Slate `_, -an infrastructure built on Kubernetes and OpenShift. Please ask the RADICAL team for a -corresponding MongoDB URI by opening a -`ticket `_. - -RADICAL-Pilot will connect to the MongoDB instance using the provided URI. - -.. code-block:: bash - - export RADICAL_PILOT_DBURL="" - Launching script example ======================== @@ -151,7 +136,6 @@ launching command for the application itself. eval "$(conda shell.posix hook)" conda activate ve.rp - export RADICAL_PILOT_DBURL="mongodb://localhost:27017/" export RADICAL_PROFILE=TRUE # for debugging purposes export RADICAL_LOG_LVL=DEBUG diff --git a/docs/source/tutorials/configuration.ipynb b/docs/source/tutorials/configuration.ipynb index ca7e43ecc7..40d29782d2 100644 --- a/docs/source/tutorials/configuration.ipynb +++ b/docs/source/tutorials/configuration.ipynb @@ -235,12 +235,6 @@ "## Examples\n", "\n", "

\n", - " \n", - "__Note:__ For the initial setup regarding MongoDB see the tutorial [Getting Started](../getting_started.ipynb).\n", - "\n", - "
\n", - "\n", - "
\n", "\n", "__Note:__ In our examples, we will not show a progression bar while waiting for some operation to complete, e.g., while waiting for a pilot to stop. That is because the progression bar offered by RP's reporter does not work within a notebook. You could use it when executing an RP application as a standalone Python script.\n", "\n", diff --git a/docs/source/tutorials/multiple_pilots.ipynb b/docs/source/tutorials/multiple_pilots.ipynb index c5d6267bc7..ea115d7f40 100644 --- a/docs/source/tutorials/multiple_pilots.ipynb +++ b/docs/source/tutorials/multiple_pilots.ipynb @@ -27,12 +27,6 @@ "\n", "
\n", "\n", - "__Note:__ For the initial setup regarding MongoDB see the tutorial [Getting Started](../getting_started.ipynb).\n", - "\n", - "
\n", - "\n", - "
\n", - "\n", "__Note:__ In our examples, we will not show a progression bar while waiting for some operation to complete, e.g., while waiting for a pilot to stop. That is because the progression bar offered by RP's reporter does not work within a notebook. You could use it when executing an RP application as a standalone Python script.\n", "\n", "
\n" diff --git a/docs/source/tutorials/staging_data.ipynb b/docs/source/tutorials/staging_data.ipynb index f4b70c375b..170da3c051 100644 --- a/docs/source/tutorials/staging_data.ipynb +++ b/docs/source/tutorials/staging_data.ipynb @@ -138,12 +138,6 @@ "## Examples\n", "\n", "
\n", - " \n", - "__Note:__ For setting up MongoDB see the [Getting Started](../getting_started.ipynb) tutorial.\n", - "\n", - "
\n", - "\n", - "
\n", "\n", "__Note:__ In our examples, we will not show a progression bar while waiting for some operation to complete, e.g., while waiting for a pilot to stop. That is because the progression bar offered by RP's reporter does not work well within a notebook. You could use the reporter's progression bar when executing your RP application as a standalone Python script.\n", "\n", diff --git a/docs/source/tutorials/submission.ipynb b/docs/source/tutorials/submission.ipynb index 0e8f408b46..a7385f4051 100644 --- a/docs/source/tutorials/submission.ipynb +++ b/docs/source/tutorials/submission.ipynb @@ -187,7 +187,6 @@ " ```\n", " python3 -m venv /ve/my_rp_ve\n", " . ~/ve/my_rp_ve/bin/activate\n", - " export RADICAL_PILOT_DBURL=mongodb://user:password@ip:port/db_name\n", " python3 my_application.py\n", " ```\n", "\n", @@ -206,7 +205,6 @@ "#SBATCH --mail-type=all # Send email at begin and end of job\n", "#SBATCH -A myproject # Project/Allocation name (req'd if you have more than 1)\n", "\n", - "export RADICAL_PILOT_DBURL=mongodb://user:password@ip:port/db_name\n", "python my_application.py\n", "```\n", "\n", @@ -230,7 +228,6 @@ "ssh username@frontera.tacc.utexas.edu\n", "python3 -m venv /ve/my_rp_ve\n", ". ~/ve/my_rp_ve/bin/activate\n", - "export RADICAL_PILOT_DBURL=mongodb://user:password@ip:port/db_name\n", "python3 my_application.py\n", "```\n", "\n", From 6322b0559eb9472c2b4a6f8c17c95a8ea22d7182 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 26 Jul 2023 11:03:07 +0200 Subject: [PATCH 074/171] cleanup rpc --- src/radical/pilot/messages.py | 178 +++++++++++++++++++++++++++++++++- 1 file changed, 177 insertions(+), 1 deletion(-) diff --git a/src/radical/pilot/messages.py b/src/radical/pilot/messages.py index 5930c1d405..be6401b012 100644 --- a/src/radical/pilot/messages.py +++ b/src/radical/pilot/messages.py @@ -1,6 +1,6 @@ -from typing import Optional, Dict, Any +from typing import Optional, Dict, Tuple, Any import radical.utils as ru @@ -55,5 +55,181 @@ def uid(self, value): ru.Message.register_msg_type('heartbeat', HeartbeatMessage) + +# ------------------------------------------------------------------------------ +# +class RPCRequestMessage(ru.Message): + + # ------------------------------ + class Payload(ru.TypedDict): + _schema = { + 'uid' : str, # uid of message + 'addr': str, # who is expected to act on the request + 'cmd' : str, # rpc command + 'args': dict, # rpc command arguments + } + _defaults = { + 'uid' : None, + 'addr': None, + 'cmd' : None, + 'args': {}, + } + # ------------------------------ + + _schema = { + 'payload': Payload + } + + _defaults = { + 'msg_type': 'rpc_req', + 'payload' : {} + } + + + + # -------------------------------------------------------------------------- + def __init__(self, uid : Optional[str] = None, + addr: Optional[str] = None, + rpc : Optional[str] = None, + args: Optional[Dict[str, Any]] = None): + ''' + support msg construction and usage like this: + + msg = rp.Message(addr='pilot.0000', rpc='stop') + assert msg.addr == 'pilot.0000' + + ''' + + from_dict = dict() + + if addr: from_dict['addr'] = addr + if rpc: from_dict['rpc'] = rpc + if args: from_dict['args'] = args + + super().__init__(from_dict=from_dict) + + + # -------------------------------------------------------------------------- + @property + def addr(self): + return self.payload.addr + + @addr.setter + def addr(self, value): + self.payload.addr = value + + + @property + def rpc(self): + return self.payload.rpc + + @rpc.setter + def rpc(self, value): + self.payload.rpc = value + + + @property + def args(self): + return self.payload.args + + @args.setter + def args(self, value): + self.payload.args = value + + +ru.Message.register_msg_type('rpc_req', RPCRequestMessage) + + +# ------------------------------------------------------------------------------ +# +class RPCResultMessage(ru.Message): + + # ------------------------------ + class Payload(ru.TypedDict): + _schema = { + 'uid': str, # uid of rpc call + 'val': Any, # return value (`None` by default) + 'out': str, # stdout + 'err': str, # stderr + 'exc': str, # raised exception representation + } + _defaults = { + 'uid': None, + 'val': None, + 'out': None, + 'err': None, + 'exc': None, + } + # ------------------------------ + + _schema = { + 'payload': Payload + } + + _defaults = { + 'msg_type': 'rpc_res', + 'payload' : {} + } + + + + # -------------------------------------------------------------------------- + def __init__(self, rpc_req: Optional[RPCRequestMessage] = None, + uid : Optional[str] = None, + val : Optional[Any] = None, + out : Optional[str] = None, + err : Optional[str] = None, + exc : Optional[Tuple[str, str]] = None): + ''' + support rpc response message construction from an rpc request message + (carries over `uid`): + + msg = rp.Message(rpc_req=req_msg, val=42) + + ''' + + from_dict = dict() + + if rpc_req: from_dict['uid'] = rpc_req.uid + + if uid: from_dict['uid'] = uid + if val: from_dict['val'] = uid + if out: from_dict['out'] = uid + if err: from_dict['err'] = uid + if exc: from_dict['exc'] = uid + + super().__init__(from_dict=from_dict) + + + # -------------------------------------------------------------------------- + @property + def addr(self): + return self.payload.addr + + @addr.setter + def addr(self, value): + self.payload.addr = value + + + @property + def rpc(self): + return self.payload.rpc + + @rpc.setter + def rpc(self, value): + self.payload.rpc = value + + + @property + def args(self): + return self.payload.args + + @args.setter + def args(self, value): + self.payload.args = value + + +ru.Message.register_msg_type('rpc_req', RPCRequestMessage) + # ------------------------------------------------------------------------------ From f7195175b3c7999a6ed76073ef67c797c4bd8c0e Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 26 Jul 2023 15:40:39 +0200 Subject: [PATCH 075/171] cleaner RPC handling --- src/radical/pilot/messages.py | 227 ++++---------------------- src/radical/pilot/utils/__init__.py | 1 + src/radical/pilot/utils/rpc_helper.py | 206 +++++++++++++++++++++++ 3 files changed, 241 insertions(+), 193 deletions(-) create mode 100644 src/radical/pilot/utils/rpc_helper.py diff --git a/src/radical/pilot/messages.py b/src/radical/pilot/messages.py index be6401b012..758d3a0daa 100644 --- a/src/radical/pilot/messages.py +++ b/src/radical/pilot/messages.py @@ -9,48 +9,10 @@ # class HeartbeatMessage(ru.Message): - # ------------------------------ - class Payload(ru.TypedDict): - _schema = {'uid': str } - _defaults = {'uid': None } - # ------------------------------ - _schema = { - 'payload': Payload - } - - _defaults = { - 'msg_type': 'heartbeat', - 'payload' : {} - } - - - - # -------------------------------------------------------------------------- - def __init__(self, uid : Optional[str] = None, - from_dict: Optional[Dict[str, Any]] = None): - ''' - support msg construction and usage like this: - - hb_msg = rp.HeartbeatMessage(uid='foo.1') - assert hb_msg.uid == 'foo.1 - - ''' - - if uid: - from_dict = {'payload': {'uid': uid}} - - super().__init__(from_dict=from_dict) - - - # -------------------------------------------------------------------------- - @property - def uid(self): - return self.payload.uid - - @uid.setter - def uid(self, value): - self.payload.uid = value + _schema = {'uid' : str } + _defaults = {'_msg_type': 'heartbeat', + 'uid' : None} ru.Message.register_msg_type('heartbeat', HeartbeatMessage) @@ -60,82 +22,18 @@ def uid(self, value): # class RPCRequestMessage(ru.Message): - # ------------------------------ - class Payload(ru.TypedDict): - _schema = { - 'uid' : str, # uid of message - 'addr': str, # who is expected to act on the request - 'cmd' : str, # rpc command - 'args': dict, # rpc command arguments - } - _defaults = { - 'uid' : None, - 'addr': None, - 'cmd' : None, - 'args': {}, - } - # ------------------------------ - - _schema = { - 'payload': Payload - } - + _schema = {'uid' : str, # uid of message + 'addr' : str, # who is expected to act on the request + 'cmd' : str, # rpc command + 'args' : list, # rpc command arguments + 'kwargs' : dict} # rpc command named arguments _defaults = { - 'msg_type': 'rpc_req', - 'payload' : {} - } - - - - # -------------------------------------------------------------------------- - def __init__(self, uid : Optional[str] = None, - addr: Optional[str] = None, - rpc : Optional[str] = None, - args: Optional[Dict[str, Any]] = None): - ''' - support msg construction and usage like this: - - msg = rp.Message(addr='pilot.0000', rpc='stop') - assert msg.addr == 'pilot.0000' - - ''' - - from_dict = dict() - - if addr: from_dict['addr'] = addr - if rpc: from_dict['rpc'] = rpc - if args: from_dict['args'] = args - - super().__init__(from_dict=from_dict) - - - # -------------------------------------------------------------------------- - @property - def addr(self): - return self.payload.addr - - @addr.setter - def addr(self, value): - self.payload.addr = value - - - @property - def rpc(self): - return self.payload.rpc - - @rpc.setter - def rpc(self, value): - self.payload.rpc = value - - - @property - def args(self): - return self.payload.args - - @args.setter - def args(self, value): - self.payload.args = value - + '_msg_type': 'rpc_req', + 'uid' : None, + 'addr' : None, + 'cmd' : None, + 'args' : [], + 'kwargs' : {}} ru.Message.register_msg_type('rpc_req', RPCRequestMessage) @@ -144,92 +42,35 @@ def args(self, value): # class RPCResultMessage(ru.Message): - # ------------------------------ - class Payload(ru.TypedDict): - _schema = { - 'uid': str, # uid of rpc call - 'val': Any, # return value (`None` by default) - 'out': str, # stdout - 'err': str, # stderr - 'exc': str, # raised exception representation - } - _defaults = { - 'uid': None, - 'val': None, - 'out': None, - 'err': None, - 'exc': None, - } - # ------------------------------ - - _schema = { - 'payload': Payload - } - - _defaults = { - 'msg_type': 'rpc_res', - 'payload' : {} - } - - + _schema = {'uid' : str, # uid of rpc call + 'val' : Any, # return value (`None` by default) + 'out' : str, # stdout + 'err' : str, # stderr + 'exc' : str} # raised exception representation + _defaults = {'_msg_type': 'rpc_res', + 'uid' : None, + 'val' : None, + 'out' : None, + 'err' : None, + 'exc' : None} # -------------------------------------------------------------------------- - def __init__(self, rpc_req: Optional[RPCRequestMessage] = None, - uid : Optional[str] = None, - val : Optional[Any] = None, - out : Optional[str] = None, - err : Optional[str] = None, - exc : Optional[Tuple[str, str]] = None): - ''' - support rpc response message construction from an rpc request message - (carries over `uid`): + # + def __init__(self, rpc_req=None, from_dict=None, **kwargs): - msg = rp.Message(rpc_req=req_msg, val=42) + # when constfructed from a request message copy the uid - ''' + if rpc_req: + if not from_dict: + from_dict = dict() - from_dict = dict() + from_dict['uid'] = rpc_req['uid'] - if rpc_req: from_dict['uid'] = rpc_req.uid + super().__init__(from_dict, **kwargs) - if uid: from_dict['uid'] = uid - if val: from_dict['val'] = uid - if out: from_dict['out'] = uid - if err: from_dict['err'] = uid - if exc: from_dict['exc'] = uid - super().__init__(from_dict=from_dict) +ru.Message.register_msg_type('rpc_res', RPCResultMessage) - # -------------------------------------------------------------------------- - @property - def addr(self): - return self.payload.addr - - @addr.setter - def addr(self, value): - self.payload.addr = value - - - @property - def rpc(self): - return self.payload.rpc - - @rpc.setter - def rpc(self, value): - self.payload.rpc = value - - - @property - def args(self): - return self.payload.args - - @args.setter - def args(self, value): - self.payload.args = value - - -ru.Message.register_msg_type('rpc_req', RPCRequestMessage) - # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/utils/__init__.py b/src/radical/pilot/utils/__init__.py index 120969e3a1..b838704efe 100644 --- a/src/radical/pilot/utils/__init__.py +++ b/src/radical/pilot/utils/__init__.py @@ -39,6 +39,7 @@ from .component import * from .component_manager import * from .serializer import * +from .rpc_helper import * # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/utils/rpc_helper.py b/src/radical/pilot/utils/rpc_helper.py new file mode 100644 index 0000000000..b3ba63531f --- /dev/null +++ b/src/radical/pilot/utils/rpc_helper.py @@ -0,0 +1,206 @@ + +__copyright__ = 'Copyright 2023, The RADICAL-Cybertools Team' +__license__ = 'MIT' + +import io +import sys +import queue + +import threading as mt + +import radical.utils as ru + +from ..constants import CONTROL_PUBSUB +from ..messages import RPCRequestMessage, RPCResultMessage + + +# ------------------------------------------------------------------------------ +# +class RPCHelper(object): + ''' + This class implements a simple synchronous RPC mechanism. It only requires + the addresses of the control pubsub to use. + ''' + + + # -------------------------------------------------------------------------- + # + def __init__(self, ctrl_addr_pub, ctrl_addr_sub, log, prof): + + self._addr_pub = ctrl_addr_pub + self._addr_sub = ctrl_addr_sub + + self._log = log + self._prof = prof + + self._active = None + self._queue = queue.Queue() + self._lock = mt.Lock() + self._handlers = dict() + + self._pub = ru.zmq.Publisher(channel=CONTROL_PUBSUB, + url=self._addr_pub, + log=self._log, + prof=self._prof) + + self._thread = mt.Thread(target=self._work) + self._thread.daemon = True + self._thread.start() + + + # -------------------------------------------------------------------------- + # + def request(self, cmd, *args, **kwargs): + + rid = ru.generate_id('rpc') + req = RPCRequestMessage(uid=rid, cmd=cmd, args=args, kwargs=kwargs) + + self._active = rid + + self._pub.put(CONTROL_PUBSUB, req) + self._log.debug_3('sent rpc req %s', req) + + res = self._queue.get() + + assert res.uid == req.uid + + if res.exc: + # FIXME: try to deserialize exception type + # this should work at least for standard exceptions + raise RuntimeError(str(res.exc)) + + return res + + + # -------------------------------------------------------------------------- + # + def _work(self): + + pub = ru.zmq.Publisher(channel=CONTROL_PUBSUB, + url=self._addr_pub, + log=self._log, + prof=self._prof) + + sub = ru.zmq.Subscriber(channel=CONTROL_PUBSUB, + topic=CONTROL_PUBSUB, + url=self._addr_sub, + log=self._log, + prof=self._prof) + sub.subscribe(CONTROL_PUBSUB) + + import time + time.sleep(1) + + while True: + + data = sub.get_nowait(100) + if not data or data == [None, None]: + continue + + msg_topic = data[0] + msg_data = data[1] + + if not isinstance(msg_data, dict): + continue + + try: + msg = ru.zmq.Message.deserialize(msg_data) + + except Exception as e: + # not a `ru.zmq.Message` type + continue + + if isinstance(msg, RPCRequestMessage): + + # handle any RPC requests for which a handler is registered + self._log.debug_2('got rpc req: %s', msg) + + with self._lock: + if msg.cmd in self._handlers: + rep = self._handle_request(msg) + pub.put(CONTROL_PUBSUB, rep) + else: + self._log.debug_2('no rpc handler for %s', msg.cmd) + + elif isinstance(msg, RPCResultMessage): + + # collect any RPC response whose uid matches the one we wait for + + self._log.debug_2('got rpc res', self._active, msg.uid) + if self._active and msg.uid == self._active: + self._active = None + self._queue.put(msg) + + + # -------------------------------------------------------------------------- + # + def _handle_request(self, msg): + + bakout = sys.stdout + bakerr = sys.stderr + + strout = None + strerr = None + + val = None + out = None + err = None + exc = None + + try: + self._log.debug_2('rpc handler: %s(%s, %s)', + self._handlers[msg.cmd], *msg.args, **msg.kwargs) + + sys.stdout = strout = io.StringIO() + sys.stderr = strerr = io.StringIO() + + val = self._handlers[msg.cmd](*msg.args, **msg.kwargs) + out = strout.getvalue() + err = strerr.getvalue() + + except Exception as e: + self._log.exception('rpc call failed: %s' % (msg)) + val = None + out = strout.getvalue() + err = strerr.getvalue() + exc = (repr(e), '\n'.join(ru.get_exception_trace())) + + finally: + # restore stdio + sys.stdout = bakout + sys.stderr = bakerr + + return RPCResultMessage(rpc_req=msg, val=val, out=out, err=err, exc=exc) + + + # -------------------------------------------------------------------------- + # + def add_handler(self, cmd, handler): + ''' + register a handler for the specified rpc command type + ''' + + with self._lock: + + if cmd in self._handlers: + raise ValueError('handler for rpc cmd %s already set' % cmd) + + self._handlers[cmd] = handler + + + # -------------------------------------------------------------------------- + # + def del_handler(self, cmd): + ''' + unregister a handler for the specified rpc command type + ''' + + with self._lock: + + if cmd not in self._handlers: + raise ValueError('handler for rpc cmd %s not set' % cmd) + + del self._handlers[cmd] + + +# ------------------------------------------------------------------------------ From 43392ff7d4476a0ada509a5a778fb2f3df023661 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 28 Jul 2023 19:49:53 +0200 Subject: [PATCH 076/171] import fix --- src/radical/pilot/utils/component.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index fbc312a438..c49dc24a25 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -11,8 +11,10 @@ import threading as mt import radical.utils as ru -from .. import constants as rpc -from .. import states as rps +from .. import constants as rpc +from .. import states as rps + +from .rpc_helper import RPCHelper # ------------------------------------------------------------------------------ From e56aff359366e2874d7e6498a6542864f2136904 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 28 Jul 2023 20:00:34 +0200 Subject: [PATCH 077/171] adress comments --- bin/radical-pilot-agent-funcs | 271 ----------------------------- bin/radical-pilot-agent_n | 30 +--- bin/radical-pilot-bridge | 1 - setup.py | 1 - src/radical/pilot/agent/agent_0.py | 18 +- 5 files changed, 7 insertions(+), 314 deletions(-) delete mode 100755 bin/radical-pilot-agent-funcs diff --git a/bin/radical-pilot-agent-funcs b/bin/radical-pilot-agent-funcs deleted file mode 100755 index 6f5ac01116..0000000000 --- a/bin/radical-pilot-agent-funcs +++ /dev/null @@ -1,271 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import time - -import multiprocessing as mp -import threading as mt - -import radical.utils as ru - -# wtf -import queue - -# FIXME: the func executor may need a small bootstrapper - -pwd = sys.argv[1] - - -# ------------------------------------------------------------------------------ -# activate virtenv if needed -ve = None -if len(sys.argv) > 2: - ve = sys.argv[2] - -if ve and ve not in ['', 'None', None]: - - activate = "%s/bin/activate_this.py" % ve - - # execfile(activate, dict(__file__=activate)) - exec(open(activate).read(), dict(__file__=activate)) - - -# ------------------------------------------------------------------------------ -# -class Executor(object): - ''' - This executor is running as an RP task and owns a complete node. On each - core of that node, it spawns a worker process to execute function calls. - Communication to those processes is establshed via two mp.Queue instances, - one for feeding call requests to the worker processes, and one to collect - results from their execution - - Once the workers are prepared, the Executor will listens on an task level - ZMQ channel for incoming call requests, which are then proxied to the - workers as described above. This happens in a separate thread. Another - thread is spawned to inversely collect the results as described above and to - proxy them to an outgoing ZMQ channel. The Executor main thread will listen - on a 3rd ZMQ channel for control messages, and specifically for termination - commands. - ''' - - # -------------------------------------------------------------------------- - # - def __init__(self, n_workers=None): - - self._nw = n_workers - self._uid = os.environ['RP_FUNCS_ID'] - self._log = ru.Logger(self._uid, ns='radical.pilot', path=pwd) - self._prof = ru.Profiler(self._uid, ns='radical.pilot', path=pwd) - self._cfg = ru.read_json('%s/%s.cfg' % (pwd, self._uid)) - - self._initialize() - - - # -------------------------------------------------------------------------- - # - def _initialize(self): - ''' - set up processes, threads and communication channels - ''' - - self._prof.prof('init_start', uid=self._uid) - - addr_req = self._cfg.get('req_get') - addr_res = self._cfg.get('res_put') - - self._log.debug('req get addr: %s', addr_req) - self._log.debug('res put addr: %s', addr_res) - - assert addr_req - assert addr_res - - # connect to - # - # - the queue which feeds us tasks - # - the queue were we send completed tasks - # - the command queue (for termination) - # - self._zmq_req = ru.zmq.Getter(channel='funcs_req_queue', url=addr_req) - self._zmq_res = ru.zmq.Putter(channel='funcs_res_queue', url=addr_res) - # self._zmq_ctl = ru.zmq.Getter(channel='CTL', url=addr['CTL_GET']) - - # use mp.Queue instances to proxy tasks to the worker processes - self._mpq_work = mp.Queue() - self._mpq_result = mp.Queue() - - # signal for thread termination - self._term = mt.Event() - - # start threads to feed / drain the workers - self._t_get_work = mt.Thread(target=self._get_work) - self._t_get_results = mt.Thread(target=self._get_results) - - self._t_get_work.daemon = True - self._t_get_results.daemon = True - - self._t_get_work.start() - self._t_get_results.start() - - # start one worker per core - if not self._nw: - self._nw = mp.cpu_count() - - self._log.debug('#workers: %d', self._nw) - - self._workers = list() - for i in range(self._nw): - wid = '%s.%03d' % (self._uid, i) - proc = mp.Process(target=self._work, args=(self._uid, wid)) - proc.daemon = True - proc.start() - self._workers.append(proc) - - self._prof.prof('init_stop', uid=self._uid) - - - # -------------------------------------------------------------------------- - # - def run(self): - ''' - executor main loop: initialize all connections, processes, threads, then - listen on the command channel for things to do (like, terminate). - ''' - - while True: - - # msgs = self._zmq_ctl.get_nowait(100) - msgs = list() - time.sleep(1) - - if not msgs: - continue - - for msg in msgs: - - self._prof.prof('cmd', uid=self._uid, msg=msg['cmd']) - - if msg['cmd'] == 'term': - - # kill worker processes - for worker in self._workers: - worker.terminate() - - sys.exit(0) - - else: - self._log.error('unknown command %s', msg) - - - # -------------------------------------------------------------------------- - # - def _get_work(self): - ''' - thread feeding tasks pulled from the ZMQ work queue to worker processes - ''' - - # FIXME: This drains the qork queue with no regard of load balancing. - # For example, the first tasks may stall this executer - # for a long time, but new tasks are pulled nonetheless, even if - # other executors are not stalling and could execute them timely. - # We should at most fill a cache of limited size. - - while not self._term.is_set(): - - tasks = self._zmq_req.get_nowait(timeout=1000) - - if tasks: - - self._log.debug('got %d tasks', len(tasks)) - - # send task individually to load balance workers - for task in tasks: - self._mpq_work.put(task) - - - # -------------------------------------------------------------------------- - # - def _get_results(self): - ''' - thread feeding back results from to workers to the result ZMQ queue - ''' - - while not self._term.is_set(): - - # we always pull *individual* tasks from the result queue - try: - task = self._mpq_result.get(block=True, timeout=0.1) - - except queue.Empty: - continue - - if task: - self._zmq_res.put(task) - - - # -------------------------------------------------------------------------- - # - def _work(self, uid, wid): - ''' - work loop for worker processes: pull a task from the work queue, - run it, push the result onto the result queue - ''' - - self._prof.prof('work_start', comp=wid, uid=uid) - - while True: - - try: - task = self._mpq_work.get(block=True, timeout=0.1) - - except queue.Empty: - continue - - # import pprint - # pprint.pprint(task) - - tid = task['uid'] - descr = task['description'] - exe = descr['executable'] - args = descr.get('arguments', list()) - pres = descr.get('pre_exec', list()) - cmd = '%s(%s)' % (exe, ','.join(args)) - - self._prof.prof('task_get', comp=wid, uid=tid) - # self._log.debug('get %s: %s', tid, cmd) - - try: - for pre in pres: - if pre.split()[0] == 'import': - for mod in pre.split()[1:]: - if mod not in globals(): - globals()[mod] = ru.import_module(mod) - locals() [mod] = globals()[mod] - - task['stdout'] = eval(cmd) - task['stderr'] = None - task['state'] = 'DONE' - - except Exception as e: - task['stdout'] = None - task['stderr'] = str(e) - task['state'] = 'FAILED' - - # self._log.debug('put %s: %s', tid, str(task['res'])) - self._prof.prof('task_put', comp=wid, uid=tid) - - task['wid'] = wid - self._mpq_result.put(task) - - -# ------------------------------------------------------------------------------ -# -if __name__ == '__main__': - - executor = Executor() - executor.run() - - -# ------------------------------------------------------------------------------ - diff --git a/bin/radical-pilot-agent_n b/bin/radical-pilot-agent_n index 0e27cfbad1..e2200eb5ad 100755 --- a/bin/radical-pilot-agent_n +++ b/bin/radical-pilot-agent_n @@ -64,10 +64,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): reg.close() - if uid == 'agent_0': - agent = run_agent_0(sid, reg_addr, uid, log, prof) - else: - agent = run_agent_n(sid, reg_addr, uid, log, prof) + agent = run_agent_n(sid, reg_addr, uid, log, prof) agent.start() @@ -108,34 +105,15 @@ def wrapped_main(sid, reg_addr, uid, log, prof): time.sleep(1) -# ------------------------------------------------------------------------------ -# -def run_agent_0(sid, reg_addr, uid, log, prof): - - session = rp.Session(uid=sid, cfg=s_cfg, - _role=rp.Session._AGENT_0, _reg_addr=reg_addr) - - # session just started a registry - populate it further with agent related - # config sections - reg = ru.zmq.RegistryClient(url=reg_addr) - - reg['agent.%s.cfg' % uid] = a_cfg - - reg.close() - - agent = rp.Agent_0(a_cfg, session) - - return agent - - # ------------------------------------------------------------------------------ # def run_agent_n(sid, reg_addr, uid, log, prof): reg = ru.zmq.RegistryClient(url=reg_addr) - hb_cfg = ru.TypedDict(reg['heartbeat']) - a_cfg = ru.TypedDict(reg['agent.%s.cfg' % uid]) + hb_cfg = ru.Config(cfg=reg['heartbeat']) + s_cfg = ru.Config(cfg=reg['cfg']) + a_cfg = ru.Config(cfg=reg['agent.%s.cfg' % uid]) reg.close() diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index 476cfb9347..9b55b6edc0 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -119,7 +119,6 @@ def wrapped_main(sid, reg_addr, uid, log, prof): if 'pubsub' in uid: d = ru.zmq.test_pubsub(bridge.channel, bridge.addr_pub, bridge.addr_sub) - print('%.1f' % time.time(), d) sys.stdout.flush() sys.stderr.flush() diff --git a/setup.py b/setup.py index 78e1906742..c77a02eb2b 100755 --- a/setup.py +++ b/setup.py @@ -245,7 +245,6 @@ def run(self): 'bin/radical-pilot-agent_0', 'bin/radical-pilot-agent_n', # 'bin/radical-pilot-agent-bridge', - 'bin/radical-pilot-agent-funcs', 'bin/radical-pilot-agent-statepush', 'bin/radical-pilot-bridge', 'bin/radical-pilot-bson2json', diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index aec72eda2a..d2a3ec66d6 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -181,19 +181,6 @@ def _configure_app_comm(self): self._rcfg['task_environment']['RP_%s_IN' % AC] = ac['addr_in'] self._rcfg['task_environment']['RP_%s_OUT' % AC] = ac['addr_out'] - # some of the bridge addresses also need to be exposed to the workload - if app_comm: - if 'task_environment' not in self._cfg: - self._cfg['task_environment'] = dict() - for ac in app_comm: - if ac not in self._reg['bridges']: - raise RuntimeError('missing app_comm %s' % ac) - self._cfg['task_environment']['RP_%s_IN' % ac.upper()] = \ - self._reg['bridges.%s.ac' % ac]['addr_in'] - self._cfg['task_environment']['RP_%s_OUT' % ac.upper()] = \ - self._reg['bridges.%s.addr_out' % ac] - - # -------------------------------------------------------------------------- # @@ -493,7 +480,7 @@ def _start_sub_agents(self): 'ranks' : 1, 'cores_per_rank': self._rm.info.cores_per_node, 'executable' : '/bin/sh', - 'arguments' : [bs_name, sa] + 'arguments' : [bs_name, self._sid, self.cfg.reg_addr, sa] }).as_dict(), 'slots': {'ranks' : [{'node_name': node['node_name'], 'node_id' : node['node_id'], @@ -525,7 +512,8 @@ def _start_sub_agents(self): tmp = '#!/bin/sh\n\n' tmp += '. ./env/agent.env\n' - tmp += '/bin/sh -l ./bootstrap_2.sh %s\n\n' % sa + tmp += '/bin/sh -l ./bootstrap_2.sh %s %s %s\n\n' \ + % (self._sid, self.cfg.reg_addr, sa) with ru.ru_open(exec_script, 'w') as fout: fout.write(tmp) From 7d6864e4d5a180062e41fcd8ba0135161fbce58d Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 2 Aug 2023 15:55:05 +0200 Subject: [PATCH 078/171] apply resource schema --- .../pilot/configs/resource_access.json | 214 +++++++++--------- src/radical/pilot/configs/resource_anl.json | 60 ++--- src/radical/pilot/configs/resource_debug.json | 52 +++-- src/radical/pilot/configs/resource_llnl.json | 13 +- src/radical/pilot/configs/resource_local.json | 51 +++-- src/radical/pilot/configs/resource_ncar.json | 41 ++-- src/radical/pilot/configs/resource_ncsa.json | 125 +++++----- src/radical/pilot/configs/resource_nersc.json | 49 ++-- src/radical/pilot/configs/resource_ornl.json | 96 ++++---- .../pilot/configs/resource_princeton.json | 52 +++-- .../pilot/configs/resource_rutgers.json | 29 ++- src/radical/pilot/configs/resource_tacc.json | 69 +++--- src/radical/pilot/configs/resource_uva.json | 29 ++- src/radical/pilot/session.py | 26 ++- 14 files changed, 486 insertions(+), 420 deletions(-) diff --git a/src/radical/pilot/configs/resource_access.json b/src/radical/pilot/configs/resource_access.json index 8c0cf9250c..93c0bd6314 100644 --- a/src/radical/pilot/configs/resource_access.json +++ b/src/radical/pilot/configs/resource_access.json @@ -3,16 +3,16 @@ "expanse": { "description" : "(https://www.sdsc.edu/support/user_guides/expanse.html).", "notes" : "Always set the ``project`` attribute in the PilotDescription.", - "schemas" : ["local", "ssh"], - "local" : - { - "job_manager_endpoint" : "slurm://expanse.sdsc.xsede.org", - "filesystem_endpoint" : "file://expanse.sdsc.xsede.org" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://expanse.sdsc.xsede.org", - "filesystem_endpoint" : "sftp://expanse.sdsc.xsede.org" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint" : "slurm://expanse.sdsc.xsede.org", + "filesystem_endpoint" : "file://expanse.sdsc.xsede.org" + }, + "ssh" : { + "job_manager_endpoint" : "slurm+ssh://expanse.sdsc.xsede.org", + "filesystem_endpoint" : "sftp://expanse.sdsc.xsede.org" + } }, "default_remote_workdir" : "/expanse/lustre/scratch/$USER/temp_project", "default_queue" : "compute", @@ -41,22 +41,21 @@ "stampede2_ssh": { "description" : "The ACCESS 'Stampede' cluster at TACC (https://docs.tacc.utexas.edu/hpc/stampede2/).", "notes" : "Always set the ``project`` attribute in the PilotDescription or the pilot will fail.", - "schemas" : ["local", "gsissh", "ssh"], "mandatory_args" : ["project"], - "gsissh" : - { - "job_manager_endpoint" : "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", - "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" - }, - "local" : - { - "job_manager_endpoint" : "slurm://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + "default_schema" : "local", + "schemas" : { + "gsissh" : { + "job_manager_endpoint": "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", + "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" + }, + "local" : { + "job_manager_endpoint": "slurm://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + } }, "default_queue" : "normal", "resource_manager" : "SLURM", @@ -89,22 +88,21 @@ "stampede2_mpirun": { "description" : "The ACCESS 'Stampede' cluster at TACC (https://docs.tacc.utexas.edu/hpc/stampede2/).", "notes" : "Always set the ``project`` attribute in the PilotDescription or the pilot will fail.", - "schemas" : ["local", "gsissh", "ssh"], "mandatory_args" : ["project"], - "gsissh" : - { - "job_manager_endpoint" : "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", - "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" - }, - "local" : - { - "job_manager_endpoint" : "slurm://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + "default_schema" : "local", + "schemas" : { + "gsissh" : { + "job_manager_endpoint": "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", + "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" + }, + "local" : { + "job_manager_endpoint": "slurm://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + } }, "default_queue" : "normal", "resource_manager" : "SLURM", @@ -133,22 +131,21 @@ "stampede2_ibrun_repex": { "description" : "The ACCESS 'Stampede' cluster at TACC (https://docs.tacc.utexas.edu/hpc/stampede2/).", "notes" : "Always set the ``project`` attribute in the PilotDescription or the pilot will fail.", - "schemas" : ["local", "gsissh", "ssh"], "mandatory_args" : ["project"], - "gsissh" : - { - "job_manager_endpoint" : "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", - "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" - }, - "local" : - { - "job_manager_endpoint" : "slurm://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + "default_schema" : "local", + "schemas" : { + "gsissh" : { + "job_manager_endpoint": "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", + "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" + }, + "local" : { + "job_manager_endpoint": "slurm://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + } }, "cores_per_node" : 68, "default_queue" : "normal", @@ -181,22 +178,21 @@ "stampede2_ibrun": { "description" : "The ACCESS 'Stampede' cluster at TACC (https://docs.tacc.utexas.edu/hpc/stampede2/).", "notes" : "Always set the ``project`` attribute in the ComputePilotDescription or the pilot will fail.", - "schemas" : ["local", "gsissh", "ssh"], "mandatory_args" : ["project"], - "gsissh" : - { - "job_manager_endpoint" : "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", - "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" - }, - "local" : - { - "job_manager_endpoint" : "slurm://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + "default_schema" : "local", + "schemas" : { + "gsissh" : { + "job_manager_endpoint": "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", + "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" + }, + "local" : { + "job_manager_endpoint": "slurm://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + } }, "cores_per_node" : 68, "default_queue" : "normal", @@ -226,22 +222,21 @@ "stampede2_srun": { "description" : "The ACCESS 'Stampede' cluster at TACC (https://docs.tacc.utexas.edu/hpc/stampede2/).", "notes" : "Always set the ``project`` attribute in the PilotDescription or the pilot will fail.", - "schemas" : ["local", "gsissh", "ssh"], "mandatory_args" : ["project"], - "gsissh" : - { - "job_manager_endpoint" : "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", - "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" - }, - "local" : - { - "job_manager_endpoint" : "slurm://stampede2.tacc.utexas.edu/", - "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + "default_schema" : "local", + "schemas" : { + "gsissh" : { + "job_manager_endpoint": "slurm+gsissh://stampede2.tacc.utexas.edu:2222/", + "filesystem_endpoint" : "gsisftp://stampede2.tacc.utexas.edu:2222/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://stampede2.tacc.utexas.edu/" + }, + "local" : { + "job_manager_endpoint": "slurm://stampede2.tacc.utexas.edu/", + "filesystem_endpoint" : "file://stampede2.tacc.utexas.edu/" + } }, "default_queue" : "normal", "resource_manager" : "SLURM", @@ -271,12 +266,13 @@ "comet": { "description" : "The retired Comet HPC resource at SDSC 'HPC for the 99%%' (https://www.sdsc.edu/support/user_guides/comet.html).", "notes" : "Always set the ``project`` attribute in the PilotDescription or the pilot will fail.", - "schemas" : ["ssh"], "mandatory_args" : ["project"], - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://comet.sdsc.xsede.org/", - "filesystem_endpoint" : "sftp://comet.sdsc.xsede.org/" + "default_schema" : "ssh", + "schemas" : { + "ssh" : { + "job_manager_endpoint": "slurm+ssh://comet.sdsc.xsede.org/", + "filesystem_endpoint" : "sftp://comet.sdsc.xsede.org/" + } }, "default_queue" : "compute", "lfs_path_per_node" : "/scratch/$USER/$SLURM_JOB_ID", @@ -306,27 +302,25 @@ "bridges2": { "description" : "The ACCESS 'Bridges2' cluster at PSC (https://www.psc.edu/resources/bridges-2/user-guide-2-2/).", "notes" : "Always set the ``project`` attribute in the PilotDescription.", - "schemas" : ["local", "interactive", "gsissh", "ssh"], # "mandatory_args" : [], - "local" : - { - "job_manager_endpoint" : "slurm://bridges2.psc.xsede.org/", - "filesystem_endpoint" : "file://bridges2.psc.xsede.org/" - }, - "interactive" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" - }, - "gsissh" : - { - "job_manager_endpoint" : "slurm+gsissh://bridges2.psc.xsede.org:2222/", - "filesystem_endpoint" : "gsisftp://bridges2.psc.xsede.org:2222/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://bridges2.psc.xsede.org/", - "filesystem_endpoint" : "sftp://bridges2.psc.xsede.org/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://bridges2.psc.xsede.org/", + "filesystem_endpoint" : "file://bridges2.psc.xsede.org/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "gsissh" : { + "job_manager_endpoint": "slurm+gsissh://bridges2.psc.xsede.org:2222/", + "filesystem_endpoint" : "gsisftp://bridges2.psc.xsede.org:2222/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://bridges2.psc.xsede.org/", + "filesystem_endpoint" : "sftp://bridges2.psc.xsede.org/" + } }, "default_queue" : "RM", "resource_manager" : "SLURM", diff --git a/src/radical/pilot/configs/resource_anl.json b/src/radical/pilot/configs/resource_anl.json index ab3ed3420c..4b3c038196 100644 --- a/src/radical/pilot/configs/resource_anl.json +++ b/src/radical/pilot/configs/resource_anl.json @@ -3,12 +3,13 @@ "theta": { "description" : "Cray XC40, 4392 nodes (Intel KNL 7230)", "notes" : "Local instance of MongoDB and pre-set VE should be used.", - "schemas" : ["local"], - "local" : - { - "job_manager_hop" : "cobalt://localhost/", - "job_manager_endpoint" : "cobalt://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "cobalt://localhost/", + "job_manager_endpoint": "cobalt://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "debug-flat-quad", "resource_manager" : "COBALT", @@ -33,12 +34,13 @@ "theta_gpu": { "description" : "Extension of Theta, 24 NVIDIA DGX A100 nodes", "notes" : "Local instance of MongoDB and pre-set VE should be used.", - "schemas" : ["local"], - "local" : - { - "job_manager_hop" : "cobalt://localhost/", - "job_manager_endpoint" : "cobalt://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "cobalt://localhost/", + "job_manager_endpoint": "cobalt://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "full-node", "resource_manager" : "COBALT", @@ -66,11 +68,12 @@ "polaris": { "description" : "AMD EPYC Milan 7543P 32 core CPU with four Nvidia A100 GPUs, 560 nodes", "notes" : "Local instance of MongoDB and pre-set VE should be used.", - "schemas" : ["local"], - "local" : - { - "job_manager_endpoint" : "pbspro://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "pbspro://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "debug-scaling", "resource_manager" : "PBSPRO", @@ -95,11 +98,12 @@ "polaris_interactive": { "description" : "AMD EPYC Milan 7543P 32 core CPU with four Nvidia A100 GPUs, 560 nodes", "notes" : "Local instance of MongoDB and pre-set VE should be used.", - "schemas" : ["interactive"], - "interactive" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "interactive", + "schemas" : { + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "resource_manager" : "PBSPRO", "agent_config" : "default", @@ -122,11 +126,13 @@ "arcticus": { "description" : "JLSE Aurora testbed; 17x Coyote Pass nodes, 2x XeHP_SDV", "notes" : "Duo two-factor login. Local instance of virtualenv should be used.", - "schemas" : [ "local" ], - "local" : { - "job_manager_hop" : "cobalt://localhost/", - "job_manager_endpoint" : "cobalt://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "cobalt://localhost/", + "job_manager_endpoint": "cobalt://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, # "forward_tunnel_endpoint" : "jlselogin5", "default_queue" : "full-node", diff --git a/src/radical/pilot/configs/resource_debug.json b/src/radical/pilot/configs/resource_debug.json index 126c9dd101..4af9becf3c 100644 --- a/src/radical/pilot/configs/resource_debug.json +++ b/src/radical/pilot/configs/resource_debug.json @@ -3,11 +3,13 @@ "local": { "description" : "", "notes" : "", - "schemas" : ["local"], - "local" : { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "fork://localhost/", + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "", "resource_manager" : "FORK", @@ -35,16 +37,16 @@ "test": { "description" : "Your local machine.", "notes" : "", - "schemas" : ["local", "ssh"], - "ssh" : - { - "job_manager_endpoint" : "ssh://localhost/", - "filesystem_endpoint" : "sftp://localhost/" - }, - "local" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "ssh" : { + "job_manager_endpoint": "ssh://localhost/", + "filesystem_endpoint" : "sftp://localhost/" + }, + "local" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "pre_bootstrap_1" : [ "export RP_APP_TUNNEL_ADDR=144.76.72.175:27017", @@ -75,16 +77,16 @@ "flux": { "description" : "", "notes" : "", - "schemas" : ["local", "ssh"], - "ssh" : - { - "job_manager_endpoint" : "ssh://localhost/", - "filesystem_endpoint" : "sftp://localhost/" - }, - "local" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "ssh" : { + "job_manager_endpoint": "ssh://localhost/", + "filesystem_endpoint" : "sftp://localhost/" + }, + "local" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", diff --git a/src/radical/pilot/configs/resource_llnl.json b/src/radical/pilot/configs/resource_llnl.json index d46eaf5fc4..841c0792cc 100644 --- a/src/radical/pilot/configs/resource_llnl.json +++ b/src/radical/pilot/configs/resource_llnl.json @@ -2,12 +2,13 @@ "lassen": { "description" : "Unclassified Sierra system (arch: IBM Power9, NVIDIA TeslaV100)", "notes" : "A dedicated local instance of MongoDB should be used", - "schemas" : ["local"], - "local" : - { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "lsf://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "fork://localhost/", + "job_manager_endpoint": "lsf://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, # "forward_tunnel_endpoint" : "`hostname -f`", "default_queue" : "pbatch", diff --git a/src/radical/pilot/configs/resource_local.json b/src/radical/pilot/configs/resource_local.json index 489cd0166c..c93cb8412c 100644 --- a/src/radical/pilot/configs/resource_local.json +++ b/src/radical/pilot/configs/resource_local.json @@ -3,16 +3,16 @@ "localhost": { "description" : "Your local machine.", "notes" : "To use the ssh schema, make sure that ssh access to localhost is enabled.", - "schemas" : ["local", "ssh"], - "ssh" : - { - "job_manager_endpoint" : "ssh://localhost/", - "filesystem_endpoint" : "sftp://localhost/" - }, - "local" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "ssh" : { + "job_manager_endpoint": "ssh://localhost/", + "filesystem_endpoint" : "sftp://localhost/" + }, + "local" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", @@ -41,11 +41,12 @@ "localhost_test": { "description" : "Your local machine.", "notes" : "To use the ssh schema, make sure that ssh access to localhost is enabled.", - "schemas" : ["local"], - "local" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", @@ -71,16 +72,16 @@ "localhost_anaconda": { "description" : "Your local machine.", "notes" : "To use the ssh schema, make sure that ssh access to localhost is enabled.", - "schemas" : ["local", "ssh"], - "ssh" : - { - "job_manager_endpoint" : "ssh://localhost/", - "filesystem_endpoint" : "sftp://localhost/" - }, - "local" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "ssh" : { + "job_manager_endpoint": "ssh://localhost/", + "filesystem_endpoint" : "sftp://localhost/" + }, + "local" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", diff --git a/src/radical/pilot/configs/resource_ncar.json b/src/radical/pilot/configs/resource_ncar.json index ee8f0a2b15..e0649544e5 100644 --- a/src/radical/pilot/configs/resource_ncar.json +++ b/src/radical/pilot/configs/resource_ncar.json @@ -3,17 +3,18 @@ "cheyenne": { "description" : "An SGI ICE XA Cluster located at the National Center for Atmospheric Research (NCAR), (https://www2.cisl.ucar.edu/resources/computational-systems/cheyenne)", "notes" : "Requires the use of a token from an USB on every connection.", - "schemas" : ["local", "ssh"], - "local" : { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "pbspro://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "fork://localhost/", + "job_manager_endpoint": "pbspro://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "ssh" : { + "job_manager_endpoint": "ssh+pbspro://cheyenne.ucar.edu/", + "filesystem_endpoint" : "file://cheyenne.ucar.edu/" + } }, - "ssh" : { - "job_manager_endpoint" : "ssh+pbspro://cheyenne.ucar.edu/", - "filesystem_endpoint" : "file://cheyenne.ucar.edu/" - }, - "default_queue" : "regular", "resource_manager" : "PBSPRO", "cores_per_node" : 36, @@ -50,15 +51,17 @@ "cheyenne_mpt": { "description" : "An SGI ICE XA Cluster located at the National Center for Atmospheric Research (NCAR), (https://www2.cisl.ucar.edu/resources/computational-systems/cheyenne)", "notes" : "Requires the use of a token from an USB on every connection.", - "schemas" : ["local", "ssh"], - "local" : { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "pbspro://localhost/", - "filesystem_endpoint" : "file://localhost/" - }, - "ssh" : { - "job_manager_endpoint" : "ssh+pbspro://cheyenne.ucar.edu/", - "filesystem_endpoint" : "file://cheyenne.ucar.edu/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "fork://localhost/", + "job_manager_endpoint": "pbspro://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "ssh" : { + "job_manager_endpoint": "ssh+pbspro://cheyenne.ucar.edu/", + "filesystem_endpoint" : "file://cheyenne.ucar.edu/" + } }, "default_queue" : "regular", "resource_manager" : "PBSPRO", diff --git a/src/radical/pilot/configs/resource_ncsa.json b/src/radical/pilot/configs/resource_ncsa.json index 2948a402cf..6e2121d364 100644 --- a/src/radical/pilot/configs/resource_ncsa.json +++ b/src/radical/pilot/configs/resource_ncsa.json @@ -2,18 +2,21 @@ "delta": { "description" : "132 standard CPU (AMD EPYC 7763) nodes", - "schemas" : ["local", "batch", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://delta.ncsa.illinois.edu/", - "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://delta.ncsa.illinois.edu/", + "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" + }, + "batch" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, - "batch" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" - }, - "interactive" : "batch", "default_queue" : "cpu", "cores_per_node" : 128, "resource_manager" : "SLURM", @@ -34,18 +37,21 @@ "delta_gpu_a40": { "description" : "100 4-way A40-based GPU nodes", - "schemas" : ["local", "batch", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://delta.ncsa.illinois.edu/", - "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" - }, - "batch" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://delta.ncsa.illinois.edu/", + "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" + }, + "batch" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, - "interactive" : "batch", "default_queue" : "gpuA40x4", "cores_per_node" : 64, "gpus_per_node" : 4, @@ -67,18 +73,21 @@ "delta_gpu_a100_4way": { "description" : "100 4-way A100-based GPU nodes", - "schemas" : ["local", "batch", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://delta.ncsa.illinois.edu/", - "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" - }, - "batch" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://delta.ncsa.illinois.edu/", + "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" + }, + "batch" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, - "interactive" : "batch", "default_queue" : "gpuA100x4", "cores_per_node" : 64, "gpus_per_node" : 4, @@ -100,18 +109,21 @@ "delta_gpu_a100_8way": { "description" : "6 8-way A100-based GPU nodes", - "schemas" : ["local", "batch", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://delta.ncsa.illinois.edu/", - "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://delta.ncsa.illinois.edu/", + "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" + }, + "batch" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, - "batch" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" - }, - "interactive" : "batch", "default_queue" : "gpuA100x8", "cores_per_node" : 128, "gpus_per_node" : 8, @@ -133,18 +145,21 @@ "delta_gpu_mi100": { "description" : "1 8-way MI100-based GPU node", - "schemas" : ["local", "batch", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://delta.ncsa.illinois.edu/", - "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" - }, - "batch" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://delta.ncsa.illinois.edu/", + "filesystem_endpoint" : "file://delta.ncsa.illinois.edu/" + }, + "batch" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, - "interactive" : "batch", "default_queue" : "gpuMI100x8", "cores_per_node" : 128, "gpus_per_node" : 8, diff --git a/src/radical/pilot/configs/resource_nersc.json b/src/radical/pilot/configs/resource_nersc.json index 757b68808c..fa7365bfb1 100644 --- a/src/radical/pilot/configs/resource_nersc.json +++ b/src/radical/pilot/configs/resource_nersc.json @@ -2,17 +2,22 @@ { "perlmutter": { "description" : "CPU nodes: 3072", - "schemas" : ["local", "interactive", "batch"], - "local" : { - "job_manager_endpoint" : "slurm://perlmutter-p1.nersc.gov/", - "filesystem_endpoint" : "file://perlmutter-p1.nersc.gov/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://perlmutter-p1.nersc.gov/", + "filesystem_endpoint" : "file://perlmutter-p1.nersc.gov/" + }, + "batch" : + { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, - "batch" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" - }, - "interactive" : "batch", "default_queue" : "regular", "resource_manager" : "SLURM", "agent_scheduler" : "CONTINUOUS", @@ -36,17 +41,21 @@ "perlmutter_gpu": { "description" : "GPU nodes: 1536 with 40GiB and 256 with 80GiB of GPU-attached memory", - "schemas" : ["local", "interactive", "batch"], - "local" : { - "job_manager_endpoint" : "slurm://perlmutter-p1.nersc.gov/", - "filesystem_endpoint" : "file://perlmutter-p1.nersc.gov/" - }, - "batch" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://perlmutter-p1.nersc.gov/", + "filesystem_endpoint" : "file://perlmutter-p1.nersc.gov/" + }, + "batch" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, - "interactive" : "batch", "default_queue" : "regular", "resource_manager" : "SLURM", "agent_scheduler" : "CONTINUOUS", diff --git a/src/radical/pilot/configs/resource_ornl.json b/src/radical/pilot/configs/resource_ornl.json index 42e7e3ffc8..71926109fd 100644 --- a/src/radical/pilot/configs/resource_ornl.json +++ b/src/radical/pilot/configs/resource_ornl.json @@ -3,10 +3,12 @@ "andes": { "description" : "704 compute nodes", "notes" : "Requires RSA SecurID and uses local virtual env", - "schemas" : ["local"], - "local" : { - "job_manager_endpoint" : "slurm://andes.olcf.ornl.gov/", - "filesystem_endpoint" : "file://andes.olcf.ornl.gov/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://andes.olcf.ornl.gov/", + "filesystem_endpoint" : "file://andes.olcf.ornl.gov/" + } }, "forward_tunnel_endpoint" : "andes.olcf.ornl.gov", "default_queue" : "batch", @@ -28,10 +30,12 @@ "andes_gpu": { "description" : "9 gpu nodes", "notes" : "Requires RSA SecurID and uses local virtual env", - "schemas" : ["local"], - "local" : { - "job_manager_endpoint" : "slurm://andes.olcf.ornl.gov/", - "filesystem_endpoint" : "file://andes.olcf.ornl.gov/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://andes.olcf.ornl.gov/", + "filesystem_endpoint" : "file://andes.olcf.ornl.gov/" + } }, "forward_tunnel_endpoint" : "andes.olcf.ornl.gov", "default_queue" : "gpu", @@ -55,10 +59,12 @@ "crusher": { "description" : "2 cabinets: (1) 128 compute nodes; (2) 64 compute nodes", "notes" : "Requires RSA SecurID and uses prepared local virtual env", - "schemas" : ["local"], - "local" : { - "job_manager_endpoint" : "slurm://crusher.olcf.ornl.gov/", - "filesystem_endpoint" : "file://crusher.olcf.ornl.gov/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://crusher.olcf.ornl.gov/", + "filesystem_endpoint" : "file://crusher.olcf.ornl.gov/" + } }, "default_queue" : "batch", "resource_manager" : "SLURM", @@ -91,10 +97,12 @@ "frontier": { "description" : "74 cabinets: 128 compute nodes", "notes" : "Requires RSA SecurID and uses prepared local virtual env", - "schemas" : ["local"], - "local" : { - "job_manager_endpoint" : "slurm://frontier.olcf.ornl.gov/", - "filesystem_endpoint" : "file://frontier.olcf.ornl.gov/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://frontier.olcf.ornl.gov/", + "filesystem_endpoint" : "file://frontier.olcf.ornl.gov/" + } }, "default_queue" : "batch", "resource_manager" : "SLURM", @@ -123,10 +131,12 @@ "spock": { "description" : "3 cabinets: each containing 12 compute nodes", "notes" : "", - "schemas" : ["local"], - "local" : { - "job_manager_endpoint" : "slurm://spock.olcf.ornl.gov/", - "filesystem_endpoint" : "file://spock.olcf.ornl.gov/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://spock.olcf.ornl.gov/", + "filesystem_endpoint" : "file://spock.olcf.ornl.gov/" + } }, "default_queue" : "ecp", "resource_manager" : "SLURM", @@ -159,11 +169,13 @@ "summit": { "description" : "4608 nodes with 2 IBM POWER9 CPUs and 6 NVIDIA Volta V100 GPUs", "notes" : "Requires RSA SecurID and uses local virtual env", - "schemas" : ["local"], - "local" : { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "lsf://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "fork://localhost/", + "job_manager_endpoint": "lsf://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "batch", "resource_manager" : "LSF", @@ -202,11 +214,13 @@ "summit_jsrun": { "description" : "4608 nodes with 2 IBM POWER9 CPUs and 6 NVIDIA Volta V100 GPUs", "notes" : "Requires RSA SecurID and uses local virtual env", - "schemas" : ["local"], - "local" : { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "lsf://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "fork://localhost/", + "job_manager_endpoint": "lsf://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "batch", "resource_manager" : "LSF", @@ -249,10 +263,12 @@ "summit_interactive": { "description" : "4608 nodes with 2 IBM POWER9 CPUs and 6 NVIDIA Volta V100 GPUs", "notes" : "interactive job https://docs.olcf.ornl.gov/systems/summit_user_guide.html", - "schemas" : ["interactive"], - "interactive" : { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "interactive", + "schemas" : { + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "resource_manager" : "LSF", "agent_config" : "default", @@ -290,11 +306,13 @@ "summit_prte": { "description" : "4608 nodes with 2 IBM POWER9 CPUs and 6 NVIDIA Volta V100 GPUs", "notes" : "Requires RSA SecurID and uses local virtual env", - "schemas" : ["local"], - "local" : { - "job_manager_hop" : "fork://localhost/", - "job_manager_endpoint" : "lsf://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_hop" : "fork://localhost/", + "job_manager_endpoint": "lsf://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "batch", "resource_manager" : "LSF", diff --git a/src/radical/pilot/configs/resource_princeton.json b/src/radical/pilot/configs/resource_princeton.json index 2af22c45b5..70ecc39827 100644 --- a/src/radical/pilot/configs/resource_princeton.json +++ b/src/radical/pilot/configs/resource_princeton.json @@ -3,13 +3,14 @@ "traverse": { "description" : "", "notes" : "", - "schemas" : ["local"], "mandatory_args" : [], - "local" : - { - "job_manager_endpoint" : "slurm://traverse.princeton.edu/", - "job_manager_hop" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://traverse.princeton.edu/", + "job_manager_hop" : "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "test", "resource_manager" : "SLURM", @@ -40,13 +41,14 @@ "traverse_mpirun": { "description" : "", "notes" : "", - "schemas" : ["local"], "mandatory_args" : [], - "local" : - { - "job_manager_endpoint" : "slurm://traverse.princeton.edu/", - "job_manager_hop" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://traverse.princeton.edu/", + "job_manager_hop" : "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "test", "resource_manager" : "SLURM", @@ -75,13 +77,14 @@ "tiger_cpu": { "description" : "", "notes" : "", - "schemas" : ["local", "ssh"], "mandatory_args" : [], - "local" : - { - "job_manager_endpoint" : "slurm://tigercpu.princeton.edu/", - "job_manager_hop" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://tigercpu.princeton.edu/", + "job_manager_hop" : "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "ssh" : { @@ -123,13 +126,14 @@ "tiger_gpu": { "description" : "", "notes" : "", - "schemas" : ["local", "ssh"], "mandatory_args" : [], - "local" : - { - "job_manager_endpoint" : "slurm://tigercpu.princeton.edu/", - "job_manager_hop" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://tigercpu.princeton.edu/", + "job_manager_hop" : "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "ssh" : { diff --git a/src/radical/pilot/configs/resource_rutgers.json b/src/radical/pilot/configs/resource_rutgers.json index 86d7f85730..299a4bcbd3 100644 --- a/src/radical/pilot/configs/resource_rutgers.json +++ b/src/radical/pilot/configs/resource_rutgers.json @@ -3,21 +3,20 @@ { "description" : "Heterogeneous community-model Linux cluster", "notes" : "Access from registered IP address", - "schemas" : ["local", "ssh", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://amarel.rutgers.edu/", - "filesystem_endpoint" : "file://amarel.rutgers.edu/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://amarel.rutgers.edu/", - "filesystem_endpoint" : "sftp://amarel.rutgers.edu/" - }, - "interactive" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://amarel.rutgers.edu/", + "filesystem_endpoint" : "file://amarel.rutgers.edu/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://amarel.rutgers.edu/", + "filesystem_endpoint" : "sftp://amarel.rutgers.edu/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "main", "resource_manager" : "SLURM", diff --git a/src/radical/pilot/configs/resource_tacc.json b/src/radical/pilot/configs/resource_tacc.json index 734081b445..398c6beef4 100644 --- a/src/radical/pilot/configs/resource_tacc.json +++ b/src/radical/pilot/configs/resource_tacc.json @@ -4,22 +4,21 @@ "frontera": { "description" : "Petascale computing system at the Texas Advanced Computing Center (TACC)", "notes" : "Always launch RP from a login node or within a compute (interactive) node if you do not have a waiver from TACC for an external IP address", - "schemas" : ["local", "ssh", "interactive"], "mandatory_args" : ["project"], - "local" : - { - "job_manager_endpoint" : "slurm://frontera.tacc.utexas.edu/", - "filesystem_endpoint" : "file://frontera.tacc.utexas.edu/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://frontera.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://frontera.tacc.utexas.edu/" - }, - "interactive" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://frontera.tacc.utexas.edu/", + "filesystem_endpoint" : "file://frontera.tacc.utexas.edu/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://frontera.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://frontera.tacc.utexas.edu/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "normal", "resource_manager" : "SLURM", @@ -57,17 +56,17 @@ "frontera_rtx": { "description" : "Petascale computing system at the Texas Advanced Computing Center (TACC)", "notes" : "Always launch RP from a login node if you do not have a waiver from TACC for an external IP address", - "schemas" : ["local", "ssh"], "mandatory_args" : ["project"], - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://frontera.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://frontera.tacc.utexas.edu/" - }, - "local" : - { - "job_manager_endpoint" : "slurm://frontera.tacc.utexas.edu/", - "filesystem_endpoint" : "file://frontera.tacc.utexas.edu/" + "default_schema" : "local", + "schemas" : { + "ssh" : { + "job_manager_endpoint": "slurm+ssh://frontera.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://frontera.tacc.utexas.edu/" + }, + "local" : { + "job_manager_endpoint": "slurm://frontera.tacc.utexas.edu/", + "filesystem_endpoint" : "file://frontera.tacc.utexas.edu/" + } }, "default_queue" : "rtx", "resource_manager" : "SLURM", @@ -100,17 +99,17 @@ "frontera_prte": { "description" : "Petascale computing system at the Texas Advanced Computing Center (TACC)", "notes" : "Always launch RP from a login node if you do not have a waiver from TACC for an external IP address", - "schemas" : ["local", "ssh"], "mandatory_args" : ["project"], - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://frontera.tacc.utexas.edu/", - "filesystem_endpoint" : "sftp://frontera.tacc.utexas.edu/" - }, - "local" : - { - "job_manager_endpoint" : "slurm://frontera.tacc.utexas.edu/", - "filesystem_endpoint" : "file://frontera.tacc.utexas.edu/" + "default_schema" : "local", + "schemas" : { + "ssh" : { + "job_manager_endpoint": "slurm+ssh://frontera.tacc.utexas.edu/", + "filesystem_endpoint" : "sftp://frontera.tacc.utexas.edu/" + }, + "local" : { + "job_manager_endpoint": "slurm://frontera.tacc.utexas.edu/", + "filesystem_endpoint" : "file://frontera.tacc.utexas.edu/" + } }, "default_queue" : "normal", "resource_manager" : "SLURM", diff --git a/src/radical/pilot/configs/resource_uva.json b/src/radical/pilot/configs/resource_uva.json index acd760e322..adacac899d 100644 --- a/src/radical/pilot/configs/resource_uva.json +++ b/src/radical/pilot/configs/resource_uva.json @@ -3,21 +3,20 @@ { "description" : "Heterogeneous community-model Linux cluster", "notes" : "Access from registered UVA IP address. See https://www.rc.virginia.edu/userinfo/rivanna/login/", - "schemas" : ["local", "ssh", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://rivanna.hpc.virginia.edu/", - "filesystem_endpoint" : "file://rivanna.hpc.virginia.edu/" - }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://rivanna.hpc.virginia.edu/", - "filesystem_endpoint" : "sftp://rivanna.hpc.virginia.edu/" - }, - "interactive" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://rivanna.hpc.virginia.edu/", + "filesystem_endpoint" : "file://rivanna.hpc.virginia.edu/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://rivanna.hpc.virginia.edu/", + "filesystem_endpoint" : "sftp://rivanna.hpc.virginia.edu/" + }, + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } }, "default_queue" : "standard", "resource_manager" : "SLURM", diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index f344d07f18..acb72996a9 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -14,6 +14,8 @@ from .db import DBSession from . import utils as rpu +from .resource_description import ResourceDescription + # ------------------------------------------------------------------------------ # @@ -91,14 +93,28 @@ def __init__(self, dburl=None, uid=None, cfg=None, _primary=True, self._cmgr = None # only primary sessions have a cmgr self._cfg = ru.Config('radical.pilot.session', name=name, cfg=cfg) - self._rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) + self._rcfgs = ru.Config() + + rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) + + for site in rcfgs: + self._rcfgs[site] = ru.Config() + for res,rcfg in rcfgs[site].items(): + self._rcfgs[site][res] = ru.Config() + for schema in rcfg['schemas']: + self._rcfgs[site][res][schema] = ru.Config() + self._rcfgs[site][res][schema] = ru.Config( + from_dict=rcfgs[site][res]) + ru.dict_merge(self._rcfgs[site][res][schema], + rcfgs[site][res]['schemas'][schema]) + del self._rcfgs[site][res][schema]['schemas'] for site in self._rcfgs: - for rcfg in self._rcfgs[site].values(): + for res,rcfg in self._rcfgs[site].items(): for schema in rcfg.get('schemas', []): - while isinstance(rcfg.get(schema), str): - tgt = rcfg[schema] - rcfg[schema] = rcfg[tgt] + rd = ResourceDescription(from_dict=rcfg['schemas'][schema]) + rd.verify() + if _primary: From 2e4e96283a904e2d653af950d7d685d05b634935 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 4 Aug 2023 16:03:36 +0200 Subject: [PATCH 079/171] service startup snapshot --- examples/agent_services.py | 9 +- setup.py | 1 + src/radical/pilot/agent/agent_0.py | 164 ++++++++++++++++++++--------- src/radical/pilot/session.py | 2 +- 4 files changed, 124 insertions(+), 52 deletions(-) diff --git a/examples/agent_services.py b/examples/agent_services.py index f854af6536..a0ce10a51d 100755 --- a/examples/agent_services.py +++ b/examples/agent_services.py @@ -39,7 +39,10 @@ report.header('submit pilots') - # Add a PilotManager. PilotManagers manage one or more pilots. + # Also define a (dummy) service to be run by the pilot + sd = rp.TaskDescription({'executable': '/bin/sh', + 'arguments' : ['-c', 'radical-pilot-service-signal'], + 'named_env' : 'rp'}) # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object @@ -51,9 +54,7 @@ 'access_schema' : config.get('schema'), 'cores' : config.get('cores', 1), 'gpus' : config.get('gpus', 0), - # TODO create shell script - 'services' :[rp.TaskDescription({'executable':'free -h'}), - rp.TaskDescription({'executable':'free -h'}) ] + 'services' : [sd, sd] } pdesc = rp.PilotDescription(pd_init) diff --git a/setup.py b/setup.py index 78e1906742..60ae178717 100755 --- a/setup.py +++ b/setup.py @@ -266,6 +266,7 @@ def run(self): 'bin/radical-pilot-raptor-worker', 'bin/radical-pilot-resources', 'bin/radical-pilot-run-session', + 'bin/radical-pilot-service-signal', 'bin/radical-pilot-stats', 'bin/radical-pilot-stats.plot', 'bin/radical-pilot-ve', diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index aca44ca16a..fbc05c79ba 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -71,9 +71,6 @@ def __init__(self): # ensure that app communication channels are visible to workload self._configure_app_comm() - # start any services if they are requested - self._start_services() - # create the sub-agent configs and start the sub agents self._write_sa_configs() self._start_sub_agents() # TODO: move to cmgr? @@ -219,6 +216,22 @@ def initialize(self): self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.PROXY_TASK_QUEUE) + # subscribe for control messages + ru.zmq.Subscriber(channel=rpc.CONTROL_PUBSUB, cb=self._control_cb, + url=self._reg['bridges.%s.addr_sub' % rpc.CONTROL_PUBSUB]) + + + if True: + time.sleep(1) + reg = ru.zmq.RegistryClient(url=self._reg._url) + pub = ru.zmq.Publisher('control_pubsub', reg['bridges.control_pubsub.addr_pub']) + + pub.put('control_pubsub', msg={'cmd': 'service_up', + 'uid': 'test.1'}) + + # make sure the message goes out + time.sleep(1) + # before we run any tasks, prepare a named_env `rp` for tasks which use # the pilot's own environment, such as raptors env_spec = {'type' : os.environ['RP_VENV_TYPE'], @@ -230,6 +243,9 @@ def initialize(self): } self._prepare_env('rp', env_spec) + # start any services if they are requested + self._start_services() + # sub-agents are started, components are started, bridges are up: we are # ready to roll! Send state update rm_info = self._rm.info @@ -348,15 +364,16 @@ def _write_sa_configs(self): # def _start_services(self): - service_descriptions = self._cfg.services - if not service_descriptions: + sds = self._cfg.services + if not sds: return + self._log.info('starting agent services') services = list() - for service_desc in service_descriptions: + for sd in sds: - td = TaskDescription(service_desc) + td = TaskDescription(sd) td.mode = AGENT_SERVICE # ensure that the description is viable td.verify() @@ -383,6 +400,9 @@ def _start_services(self): self._service_uids_launched.append(tid) services.append(task) + self._log.debug('start service %s: %s', tid, sd) + + self.advance(services, publish=False, push=True) # Waiting 2mins for all services to launch @@ -570,7 +590,7 @@ def _control_cb(self, _, msg): requests to handle. ''' - self._log.debug('control: %s', msg) + self._log.debug('==== control: %s', msg) cmd = msg['cmd'] arg = msg['arg'] @@ -580,13 +600,11 @@ def _control_cb(self, _, msg): if cmd == 'pmgr_heartbeat' and arg['pmgr'] == self._pmgr: - self._session._hb.beat(uid=self._pmgr) return True elif cmd == 'prep_env': - env_spec = arg for env_id in env_spec: self._prepare_env(env_id, env_spec[env_id]) @@ -594,57 +612,109 @@ def _control_cb(self, _, msg): elif cmd == 'cancel_pilots': + return self._ctrl_cancel_pilots(msg) - if self._pid not in arg.get('uids'): - self._log.debug('ignore cancel %s', msg) + elif cmd == 'rpc_req': + return self._ctrl_rpc_req(msg) - self._log.info('cancel pilot cmd') - self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'terminate', - 'arg' : None}) - self._final_cause = 'cancel' - self.stop() + elif cmd == 'service_up': + return self._ctrl_service_up(msg) - # work is done - unregister this cb - return False + return True - elif cmd == 'rpc_req': + # -------------------------------------------------------------------------- + # + def _ctrl_cancel_pilots(self, msg): + + if self._pid not in arg.get('uids'): + self._log.debug('ignore cancel %s', msg) + + self._log.info('cancel pilot cmd') + self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'terminate', + 'arg' : None}) + self._final_cause = 'cancel' + self.stop() - req = arg['rpc'] - if req not in ['hello', 'prepare_env']: - # we don't handle that request + # work is done - unregister this cb + return False + + + # -------------------------------------------------------------------------- + # + def _ctrl_rpc_req(self, msg): + + cmd = msg['cmd'] + arg = msg['arg'] + req = arg['rpc'] + + if req not in ['hello', 'prepare_env']: + # we don't handle that request + return True + + rpc_res = {'uid': arg['uid']} + + try: + if req == 'hello' : + out = 'hello %s' % ' '.join(arg['arg']) + + elif req == 'prepare_env': + env_name = arg['arg']['env_name'] + env_spec = arg['arg']['env_spec'] + out = self._prepare_env(env_name, env_spec) + + else: + # unknown command return True - rpc_res = {'uid': arg['uid']} + # request succeeded - respond with return value + rpc_res['err'] = None + rpc_res['out'] = out + rpc_res['ret'] = 0 + + except Exception as e: + # request failed for some reason - indicate error + rpc_res['err'] = repr(e) + rpc_res['out'] = None + rpc_res['ret'] = 1 + self._log.exception('control cmd failed') + + # publish the response (success or failure) + self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_res', + 'arg': rpc_res}) - try: - if req == 'hello' : - out = 'hello %s' % ' '.join(arg['arg']) - elif req == 'prepare_env': - env_name = arg['arg']['env_name'] - env_spec = arg['arg']['env_spec'] - out = self._prepare_env(env_name, env_spec) + # -------------------------------------------------------------------------- + # + def _ctrl_service_up(self, msg): - else: - # unknown command - return True + cmd = msg['cmd'] + uid = msg['arg']['uid'] + + # This message signals that an agent service instance is up and running. + # We expect to find the service UID in args and can then unblock the + # service startup wait for that uid + + if uid not in self._service_uids_launched: + # we do not know this service instance + self._log.warn('=== ignore service startup signal for %s', uid) + return True + + if uid in self._service_uids_running: + self._log.warn('=== duplicated service startup signal for %s', uid) + return True - # request succeeded - respond with return value - rpc_res['err'] = None - rpc_res['out'] = out - rpc_res['ret'] = 0 + self._log.debug('=== service startup message for %s', uid) - except Exception as e: - # request failed for some reason - indicate error - rpc_res['err'] = repr(e) - rpc_res['out'] = None - rpc_res['ret'] = 1 - self._log.exception('control cmd failed') + self._service_uids_running.append(uid) + self._log.debug('=== service %s started (%s / %s)', uid, + len(self._service_uids_running), + len(self._service_uids_launched)) - # publish the response (success or failure) - self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_res', - 'arg': rpc_res}) + # signal main thread when all services are up + if len(self._service_uids_launched) == \ + len(self._service_uids_running): + self._services_setup.set() return True diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 7e0301ab42..61c2ba0fab 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -286,8 +286,8 @@ def _init_agent_0(self): self._init_cfg_from_dict() self._start_registry() self._connect_registry() - self._start_heartbeat() self._connect_proxy() + self._start_heartbeat() self._publish_cfg() self._start_components() self._crosswire_proxy() From 97761d98afadcaf7c3611776e461329a0da39dc8 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 4 Aug 2023 17:50:34 +0200 Subject: [PATCH 080/171] response to comments --- src/radical/pilot/agent/agent_0.py | 4 ++- src/radical/pilot/agent/executing/popen.py | 8 +++++- src/radical/pilot/configs/agent_debug_sa.json | 9 ------- src/radical/pilot/configs/agent_default.json | 25 ++++++++----------- .../pilot/tmgr/staging_input/default.py | 2 +- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 25cf24503b..6f4aabe269 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -472,7 +472,9 @@ def _start_sub_agents(self): if target == 'local': # start agent locally - cmdline = '/bin/sh -l %s/bootstrap_2.sh %s' % (self._pwd, sa) + bs_name = '%s/bootstrap_2.sh' % (self._pwd) + args = ' '.join([self._sid, self.cfg.reg_addr, sa]) + cmdline = '/bin/sh -l %s/%s %s' % (self._pwd, bs_name, args) else: # target == 'node': diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index eac191ef1e..351080df47 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -548,6 +548,12 @@ def _get_rp_env(self, task): if sbox.startswith(self._pwd): sbox = '$RP_PILOT_SANDBOX%s' % sbox[len(self._pwd):] + gpr = td['gpus_per_rank'] + if int(gpr) == gpr: + gpr = '%d' % gpr + else: + gpr = '%f' % gpr + ret = '\n' ret += 'export RP_TASK_ID="%s"\n' % tid ret += 'export RP_TASK_NAME="%s"\n' % name @@ -560,7 +566,7 @@ def _get_rp_env(self, task): ret += 'export RP_TASK_SANDBOX="%s"\n' % sbox ret += 'export RP_REGISTRY_ADDRESS="%s"\n' % self._session.reg_addr ret += 'export RP_CORES_PER_RANK=%d\n' % td['cores_per_rank'] - ret += 'export RP_GPUS_PER_RANK=%d\n' % td['gpus_per_rank'] + ret += 'export RP_GPUS_PER_RANK=%s\n' % gpr # FIXME AM # ret += 'export RP_LFS="%s"\n' % self.lfs diff --git a/src/radical/pilot/configs/agent_debug_sa.json b/src/radical/pilot/configs/agent_debug_sa.json index e74a62fd50..dac7a4a11d 100644 --- a/src/radical/pilot/configs/agent_debug_sa.json +++ b/src/radical/pilot/configs/agent_debug_sa.json @@ -31,15 +31,6 @@ "stall_hwm" : 1, "bulk_size" : 1024}, - "funcs_wrk_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 0}, - "funcs_res_queue" : { "kind" : "queue", - "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 0}, - "agent_unschedule_pubsub" : { "kind" : "pubsub", "log_level" : "error", "stall_hwm" : 1, diff --git a/src/radical/pilot/configs/agent_default.json b/src/radical/pilot/configs/agent_default.json index 2c4e67dde5..5384ae4e94 100644 --- a/src/radical/pilot/configs/agent_default.json +++ b/src/radical/pilot/configs/agent_default.json @@ -23,24 +23,21 @@ # stall_hwm and batch_size is 1 (no stalling, no bulking). # "bridges" : { - "agent_staging_input_queue" : {"kind": "queue", "log_lvl":"debug"}, - "agent_scheduling_queue" : {"kind": "queue", "log_lvl":"debug"}, - "agent_executing_queue" : {"kind": "queue", "log_lvl":"debug"}, - "agent_staging_output_queue" : {"kind": "queue", "log_lvl":"debug"}, - "agent_collecting_queue" : {"kind": "queue", "log_lvl":"debug"}, + "agent_staging_input_queue" : {"kind": "queue", "log_lvl":"error"}, + "agent_scheduling_queue" : {"kind": "queue", "log_lvl":"error"}, + "agent_executing_queue" : {"kind": "queue", "log_lvl":"error"}, + "agent_staging_output_queue" : {"kind": "queue", "log_lvl":"error"}, + "agent_collecting_queue" : {"kind": "queue", "log_lvl":"error"}, - "funcs_req_queue" : {"kind": "queue", "log_lvl":"debug"}, - "funcs_res_queue" : {"kind": "queue", "log_lvl":"debug"}, + "raptor_scheduling_queue" : {"kind": "queue", "log_lvl":"error"}, - "raptor_scheduling_queue" : {"kind": "queue", "log_lvl":"debug"}, + "agent_unschedule_pubsub" : {"kind": "pubsub", "log_lvl":"error"}, + "agent_schedule_pubsub" : {"kind": "pubsub", "log_lvl":"error"}, - "agent_unschedule_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}, - "agent_schedule_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}, + "control_pubsub" : {"kind": "pubsub", "log_lvl":"error"}, + "state_pubsub" : {"kind": "pubsub", "log_lvl":"error"} - "control_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}, - "state_pubsub" : {"kind": "pubsub", "log_lvl":"debug"} - - # "log_pubsub" : {"kind": "pubsub", "log_lvl":"debug"} + # "log_pubsub" : {"kind": "pubsub", "log_lvl":"error"} }, "components" : { diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 9d03fd6bed..1fbe782dab 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -210,7 +210,7 @@ def work(self, tasks): task_sboxes = sboxes[pid] - if False or len(task_sboxes) >= self._mkdir_threshold: + if len(task_sboxes) >= self._mkdir_threshold: self._log.debug('tar %d sboxes', len(task_sboxes)) session_sbox = self._session._get_session_sandbox(pilot) From 51250d2b09e0ba8702cac66e9fd37059656433ba Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 9 Aug 2023 12:43:26 +0200 Subject: [PATCH 081/171] iteration on tests --- bin/radical-pilot-service-signal | 56 ++++++++++++++ examples/00_getting_started.py | 6 +- tests/component_tests/test_component.py | 9 ++- tests/component_tests/test_session.py | 28 +++---- tests/test_raptor/test_raptor.py | 18 +++-- tests/unit_tests/test_agent_0/test_agent_0.py | 2 +- tests/unit_tests/test_raptor/test_master.py | 2 +- tests/unit_tests/test_rpc.py | 76 +++++++++++++++++++ .../test_tmgr/test_cases/task.000000.json | 1 + .../unit_tests/test_tmgr/test_tmgr_staging.py | 7 +- 10 files changed, 171 insertions(+), 34 deletions(-) create mode 100755 bin/radical-pilot-service-signal create mode 100755 tests/unit_tests/test_rpc.py diff --git a/bin/radical-pilot-service-signal b/bin/radical-pilot-service-signal new file mode 100755 index 0000000000..6a200d591f --- /dev/null +++ b/bin/radical-pilot-service-signal @@ -0,0 +1,56 @@ +#!/bin/sh + +HELP=$(cat < + + uid: UID of the service whose startup completed + + +This script is expected to be executed by a service instance which was started +by the pilot agent. The agent will block any further activity until all started +services signal theor readiness. A service specification may define a timeout +after which the startup is declaired as failed and the agent will abort. + +Internally the script will activate the agent's virtualenv and then run a small +embedded Python script which sends a message to the Agent's control channel, +informing it about the service startup. +EOT +) + +SCRIPT=$(cat < Date: Wed, 9 Aug 2023 12:57:23 +0200 Subject: [PATCH 082/171] snap --- src/radical/pilot/configs/resource_local.json | 20 +++++++++---------- src/radical/pilot/utils/misc.py | 6 ++++-- .../component_tests/test_cases/user_cfg.json | 11 +++++----- tests/unit_tests/test_pmgr.py | 8 +++----- tests/unit_tests/test_utils.py | 3 +++ 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/radical/pilot/configs/resource_local.json b/src/radical/pilot/configs/resource_local.json index 489cd0166c..a5330a046c 100644 --- a/src/radical/pilot/configs/resource_local.json +++ b/src/radical/pilot/configs/resource_local.json @@ -6,13 +6,13 @@ "schemas" : ["local", "ssh"], "ssh" : { - "job_manager_endpoint" : "ssh://localhost/", - "filesystem_endpoint" : "sftp://localhost/" + "job_manager_endpoint" : "ssh://localhost", + "filesystem_endpoint" : "sftp://localhost" }, "local" : { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "job_manager_endpoint" : "fork://localhost", + "filesystem_endpoint" : "file://localhost" }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", @@ -44,8 +44,8 @@ "schemas" : ["local"], "local" : { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "job_manager_endpoint" : "fork://localhost", + "filesystem_endpoint" : "file://localhost" }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", @@ -74,13 +74,13 @@ "schemas" : ["local", "ssh"], "ssh" : { - "job_manager_endpoint" : "ssh://localhost/", - "filesystem_endpoint" : "sftp://localhost/" + "job_manager_endpoint" : "ssh://localhost", + "filesystem_endpoint" : "sftp://localhost" }, "local" : { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "job_manager_endpoint" : "fork://localhost", + "filesystem_endpoint" : "file://localhost" }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", diff --git a/src/radical/pilot/utils/misc.py b/src/radical/pilot/utils/misc.py index 27db783ba2..8ebbbc8900 100644 --- a/src/radical/pilot/utils/misc.py +++ b/src/radical/pilot/utils/misc.py @@ -180,10 +180,12 @@ def get_resource_fs_url(resource: str, rcfg = get_resource_config(resource) if not schema: - schema = rcfg['schemas'][0] + schema = rcfg['default_schema'] # return a deep copy - return ru.Url(rcfg[schema]['filesystem_endpoint']) + import pprint + pprint.pprint(rcfg.as_dict()) + return ru.Url(rcfg['schemas'][schema]['filesystem_endpoint']) # ------------------------------------------------------------------------------ diff --git a/tests/component_tests/test_cases/user_cfg.json b/tests/component_tests/test_cases/user_cfg.json index b5d1c36cd9..f8a02817cd 100644 --- a/tests/component_tests/test_cases/user_cfg.json +++ b/tests/component_tests/test_cases/user_cfg.json @@ -1,10 +1,11 @@ { "user_resource": { - "schemas" : ["local"], - "local" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "fork://localhost", + "filesystem_endpoint" : "file://localhost" + } }, "default_remote_workdir" : "$HOME", "resource_manager" : "FORK", diff --git a/tests/unit_tests/test_pmgr.py b/tests/unit_tests/test_pmgr.py index a9b6f65490..85a85b8cfd 100644 --- a/tests/unit_tests/test_pmgr.py +++ b/tests/unit_tests/test_pmgr.py @@ -15,9 +15,11 @@ class PMGRTestCase(TestCase): # @mock.patch.object(PilotManager, '__init__', return_value=None) @mock.patch.object(PilotManager, 'wait_pilots', return_value=None) - def test_cancel_pilots(self, mocked_wait_pilots, mocked_init): + @mock.patch.object(PilotManager, 'publish', return_value=None) + def test_cancel_pilots(self, mocked_publish, mocked_wait_pilots, mocked_init): pmgr = PilotManager(session=None) + pmgr._uid = 'pmgr.0000' pmgr._pilots_lock = mt.RLock() pmgr._log = mock.Mock() pmgr._session = mock.Mock() @@ -28,12 +30,8 @@ def test_cancel_pilots(self, mocked_wait_pilots, mocked_init): pmgr.cancel_pilots() - self.assertTrue(pmgr._session._dbs.pilot_command.called) self.assertTrue(mocked_wait_pilots.called) - args, kwargs = pmgr._session._dbs.pilot_command.call_args_list[0] - self.assertEqual('cancel_pilot', args[0]) - self.assertIn('pilot.0000', args[2]) # pilot UIDs # -------------------------------------------------------------------------- # diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py index d5c6657e0d..44b83ec204 100755 --- a/tests/unit_tests/test_utils.py +++ b/tests/unit_tests/test_utils.py @@ -198,6 +198,9 @@ def test_resource_cfg(self): # test resource filesystem URL rfs_url = rpu_misc.get_resource_fs_url('local.localhost') + print('==== 1', rfs_url) + print('==== 2', str(rfs_url)) + print('==== 3', rcfg_local.local.filesystem_endpoint) self.assertIsInstance(rfs_url, ru.Url) self.assertEqual(str(rfs_url), rcfg_local.local.filesystem_endpoint) From 9ca5c3762dbaf54ade30234b7368cd15dc59fbe7 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 9 Aug 2023 14:40:27 +0200 Subject: [PATCH 083/171] test updates --- src/radical/pilot/agent/executing/popen.py | 24 +++++++------- .../pilot/agent/resource_manager/base.py | 2 ++ src/radical/pilot/pmgr/launching/base.py | 1 + src/radical/pilot/session.py | 8 +++++ src/radical/pilot/utils/misc.py | 9 ++++-- tests/unit_tests/test_agent_0/test_agent_0.py | 32 ++++++++++++------- tests/unit_tests/test_executing/test_base.py | 14 ++++---- tests/unit_tests/test_executing/test_popen.py | 5 +-- .../unit_tests/test_launcher/test_launcher.py | 5 +++ tests/unit_tests/test_pilot/test_pilot.py | 10 +++++- tests/unit_tests/test_raptor/test_master.py | 15 ++++++--- tests/unit_tests/test_rm/test_base.py | 28 +++++++++++----- .../test_cases/test_cores_gpus_map.json | 4 +++ .../test_scheduler/test_cases/test_base.json | 1 + tests/unit_tests/test_utils.py | 16 +++++----- 15 files changed, 120 insertions(+), 54 deletions(-) diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index 351080df47..0786025c06 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -433,8 +433,8 @@ def _check_running(self, to_watch, to_cancel): # poll subprocess object exit_code = task['proc'].poll() - to_advance = list() - to_cancel = list() + tasks_to_advance = list() + tasks_to_cancel = list() if exit_code is None: @@ -467,7 +467,7 @@ def _check_running(self, to_watch, to_cancel): self._prof.prof('task_run_cancel_stop', uid=tid) self._prof.prof('unschedule_start', uid=tid) - to_cancel.append(task) + tasks_to_cancel.append(task) else: @@ -487,7 +487,7 @@ def _check_running(self, to_watch, to_cancel): if tid in to_cancel: to_cancel.remove(tid) del task['proc'] # proc is not json serializable - to_advance.append(task) + tasks_to_advance.append(task) self._prof.prof('unschedule_start', uid=tid) @@ -504,13 +504,15 @@ def _check_running(self, to_watch, to_cancel): # stdout/stderr task['target_state'] = rps.DONE - self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, to_cancel + to_advance) - if to_cancel: - self.advance(to_cancel, rps.CANCELED, - publish=True, push=False) - if to_advance: - self.advance(to_advance, rps.AGENT_STAGING_OUTPUT_PENDING, - publish=True, push=True) + self.publish(rpc.AGENT_UNSCHEDULE_PUBSUB, + tasks_to_cancel + tasks_to_advance) + + if tasks_to_cancel: + self.advance(tasks_to_cancel, rps.CANCELED, + publish=True, push=False) + if tasks_to_advance: + self.advance(tasks_to_advance, rps.AGENT_STAGING_OUTPUT_PENDING, + publish=True, push=True) return action diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index ba93af20d1..0330794394 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -265,6 +265,8 @@ def init_from_scratch(self): n_nodes) rm_info.requested_nodes = math.ceil(n_nodes) + print('========== 1', rm_info) + assert alloc_nodes >= rm_info.requested_nodes assert alloc_nodes * rm_info.cores_per_node >= rm_info.requested_cores assert alloc_nodes * rm_info.gpus_per_node >= rm_info.requested_gpus diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index ef7a4b0bd0..a50a1a8c9e 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -657,6 +657,7 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): raise RuntimeError("'global_virtenv' is deprecated (%s)" % resource) # Create a host:port string for use by the bootstrap_0. + print('==========', agent_proxy_url) tmp = ru.Url(agent_proxy_url) if tmp.port: hostport = "%s:%d" % (tmp.host, tmp.port) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 95fd689d80..53a3735d0a 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -191,12 +191,14 @@ def __init__(self, proxy_url: Optional[str ] = None, # initialization is different for each session type # NOTE: we could refactor this to session sub-classes + print('=========== here 5', self._uid) if self._role == self._PRIMARY: # if user did not set a uid, we need to generate a new ID if not self._uid: self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) + print('=========== here 4', self._uid) self._init_primary() @@ -251,6 +253,7 @@ def _init_primary(self): # we still call `_init_cfg` to complete missing config settings # FIXME: completion only needed by `PRIMARY` + print('=========== here 3', self._uid) self._init_cfg_from_scratch() # primary sessions create a registry service @@ -365,6 +368,7 @@ def _connect_registry(self): # -------------------------------------------------------------------------- # def _init_cfg_from_scratch(self): + print('=========== here 2', self._uid) # A primary session will at this point have a registry client connected # to its registry service. Further, self._cfg will either be a config @@ -430,11 +434,15 @@ def _init_cfg_from_scratch(self): def_cfg.report_dir = self._cfg.path def_cfg.profile_dir = self._cfg.path + print('=========== here 1', self._uid) + self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid, level=self._cfg.get('debug')) + print('=================== after: %s' % self._prof) + from . import version_detail as rp_version_detail self._log.info('radical.pilot version: %s', rp_version_detail) self._log.info('radical.saga version: %s', rs.version_detail) diff --git a/src/radical/pilot/utils/misc.py b/src/radical/pilot/utils/misc.py index 8ebbbc8900..fda2571161 100644 --- a/src/radical/pilot/utils/misc.py +++ b/src/radical/pilot/utils/misc.py @@ -185,6 +185,11 @@ def get_resource_fs_url(resource: str, # return a deep copy import pprint pprint.pprint(rcfg.as_dict()) + print(schema) + print(1, schema) + print(2, rcfg['schemas'][schema]) + print(3, rcfg['schemas'][schema]['filesystem_endpoint']) + return ru.Url(rcfg['schemas'][schema]['filesystem_endpoint']) @@ -215,10 +220,10 @@ def get_resource_job_url(resource: str, rcfg = get_resource_config(resource) if not schema: - schema = rcfg['schemas'][0] + schema = rcfg['default_schema'] # return a deep copy - return ru.Url(rcfg[schema]['job_manager_endpoint']) + return ru.Url(rcfg.schemas[schema]['job_manager_endpoint']) # ------------------------------------------------------------------------------ diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index 5320e48849..153265247d 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -22,13 +22,20 @@ class TestComponent(TestCase): _cleanup_files = [] + def _init_primary_side_effect(self): + + self._log = mock.MagicMock() + self._prof = mock.MagicMock() + self._rep = mock.MagicMock() + self._reg = mock.MagicMock() + + # -------------------------------------------------------------------------- # @classmethod - @mock.patch.object(rp.Session, '_init_primary', return_value=None) - @mock.patch.object(rp.Session, '_get_logger') - @mock.patch.object(rp.Session, '_get_profiler') - @mock.patch.object(rp.Session, '_get_reporter') + @mock.patch.object(rp.Session, '_init_primary', + side_effect=_init_primary_side_effect, + autospec=True) def setUpClass(cls, *args, **kwargs) -> None: cls._session = rp.Session() @@ -52,7 +59,7 @@ def tearDownClass(cls) -> None: # -------------------------------------------------------------------------- # @mock.patch.object(Agent_0, '__init__', return_value=None) - def test_check_control(self, mocked_init): + def test_check_control_cb(self, mocked_init): global_control = [] @@ -66,6 +73,8 @@ def _prepenv_effect(env_id, spec): agent_cmp = Agent_0(ru.Config(), self._session) agent_cmp._log = mock.Mock() + agent_cmp._prof = mock.Mock() + agent_cmp._pid = 'pilot.0000' agent_cmp.publish = mock.MagicMock(side_effect=_publish_effect) agent_cmp._prepare_env = mock.MagicMock(side_effect=_prepenv_effect) @@ -73,21 +82,21 @@ def _prepenv_effect(env_id, spec): 'arg': {'uid': 'rpc.0000', 'rpc': 'bye'} } - self.assertTrue(agent_cmp._check_control(None, msg)) + self.assertTrue(agent_cmp._control_cb(None, msg)) self.assertEqual(global_control, []) msg = {'cmd': 'rpc_req', 'arg': {'uid': 'rpc.0001', 'rpc': 'bye'} } - self.assertTrue(agent_cmp._check_control(None, msg)) + self.assertTrue(agent_cmp._control_cb(None, msg)) self.assertEqual(global_control, []) msg = {'cmd': 'rpc_req', 'arg': {'uid': 'rpc.0002', 'rpc': 'hello'} } - self.assertTrue(agent_cmp._check_control(None, msg)) + self.assertIsNone(agent_cmp._control_cb(None, msg)) self.assertIn(global_control[0], [('control_pubsub', {'cmd': 'rpc_res', 'arg': {'uid': 'rpc.0002', @@ -108,7 +117,7 @@ def _prepenv_effect(env_id, spec): 'rpc': 'hello', 'arg': ['World']} } - self.assertTrue(agent_cmp._check_control(None, msg)) + self.assertIsNone(agent_cmp._control_cb(None, msg)) self.assertEqual(global_control[1], ('control_pubsub', {'cmd': 'rpc_res', 'arg': {'uid': 'rpc.0003', @@ -124,7 +133,7 @@ def _prepenv_effect(env_id, spec): 'env_spec': 'spec'} } } - self.assertTrue(agent_cmp._check_control(None, msg)) + self.assertIsNone(agent_cmp._control_cb(None, msg)) self.assertEqual(global_control[2], ('control_pubsub', {'cmd': 'rpc_res', 'arg': {'uid': 'rpc.0004', @@ -146,6 +155,7 @@ def test_start_sub_agents(self, mocked_run_sh_callout, mocked_ru_env_prep, agent_0 = Agent_0(ru.Config(), self._session) agent_0._pwd = tempfile.gettempdir() agent_0._log = mock.Mock() + agent_0._sid = 'rp.session.0' agent_0._cfg = ru.Config(from_dict={ 'agents': { 'agent_1': {'target' : 'node', @@ -284,7 +294,7 @@ def test_service_state_cb(self, mocked_init): if __name__ == '__main__': tc = TestComponent() - tc.test_check_control() + tc.test_check_control_cb() tc.test_start_sub_agents() tc.test_start_services() tc.test_service_state_cb() diff --git a/tests/unit_tests/test_executing/test_base.py b/tests/unit_tests/test_executing/test_base.py index 380aa3dd34..14c92817c6 100755 --- a/tests/unit_tests/test_executing/test_base.py +++ b/tests/unit_tests/test_executing/test_base.py @@ -43,7 +43,7 @@ def work(self, tasks): for spawner in spawners: session = ru.Config(cfg={ - 'cfg': { 'resource_cfg': { 'agent_spawner' : spawner}}}) + '_rcfg': { 'agent_spawner' : spawner}}) try: AgentExecutingComponent.create(cfg=spawner, session=session) except: @@ -72,12 +72,12 @@ def test_initialize(self, mocked_rm, mocked_init): 'launch_methods': {'SRUN': {}}} }) ec._reg = ru.Config(cfg={ - 'cfg': {'resource' : 'localhost', - 'pilot_sandbox' : '', - 'session_sandbox' : '', - 'resource_sandbox': '', - 'resource_cfg' : {'resource_manager': 'FORK', - 'agent_spawner' : 'POPEN'}}}) + 'cfg' : {'resource' : 'localhost', + 'pilot_sandbox' : '', + 'session_sandbox' : '', + 'resource_sandbox': ''}, + 'rcfg': {'resource_manager': 'FORK', + 'agent_spawner' : 'POPEN'}}) ec._log = ec._prof = mock.Mock() ec.work = ec.control_cb = mock.Mock() ec.register_input = ec.register_output = mock.Mock() diff --git a/tests/unit_tests/test_executing/test_popen.py b/tests/unit_tests/test_executing/test_popen.py index cf3898b199..8fac46322e 100755 --- a/tests/unit_tests/test_executing/test_popen.py +++ b/tests/unit_tests/test_executing/test_popen.py @@ -81,7 +81,8 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, pex._log = pex._prof = pex._watch_queue = mock.Mock() pex._log._debug_level = 1 - pex._cfg = {'resource_cfg': {'new_session_per_task': False}} + pex._reg = ru.Config(from_dict={'rcfg.new_session_per_task': False}) + pex._cfg = dict() pex._pwd = '' pex._pid = 'pilot.0000' pex.sid = 'session.0000' @@ -99,7 +100,6 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, pex._handle_task(task) popen_input_kwargs = mocked_sp_popen.call_args_list[0][1] - print(popen_input_kwargs) self.assertFalse(popen_input_kwargs['start_new_session']) for prefix in ['.launch.sh', '.exec.sh']: @@ -184,6 +184,7 @@ def test_check_running(self, mocked_killpg, mocked_init): to_watch.append(task) to_cancel.append(task['uid']) pex._check_running(to_watch, to_cancel) + print('to_cancel:', to_cancel) self.assertFalse(to_cancel) # case 2: exit_code == 0 diff --git a/tests/unit_tests/test_launcher/test_launcher.py b/tests/unit_tests/test_launcher/test_launcher.py index c7b416a118..f6c8276cfb 100755 --- a/tests/unit_tests/test_launcher/test_launcher.py +++ b/tests/unit_tests/test_launcher/test_launcher.py @@ -46,6 +46,11 @@ def _get_client_sandbox(self): cls._session = Session() cls._configs = ru.Config('radical.pilot.resource', name='*') + for site in cls._configs: + for k,v in cls._configs[site].items(): + v['agent_proxy_url'] = 'tcp://localhost:1024' + + # -------------------------------------------------------------------------- # @mock.patch.object(PMGRLaunchingComponent, '__init__', return_value=None) diff --git a/tests/unit_tests/test_pilot/test_pilot.py b/tests/unit_tests/test_pilot/test_pilot.py index 776e9fdf07..f5c8a81f57 100755 --- a/tests/unit_tests/test_pilot/test_pilot.py +++ b/tests/unit_tests/test_pilot/test_pilot.py @@ -5,6 +5,7 @@ from unittest import mock from unittest import TestCase +import radical.utils as ru import radical.pilot as rp @@ -20,7 +21,14 @@ def test_pilot_uid(self, mocked_init): pmgr._uid = 'pmgr.0000' pmgr._log = mock.Mock() pmgr._prof = mock.Mock() - pmgr._session = mock.Mock() + pmgr._session = ru.Config(from_dict={'_reg': { + 'bridges.control_pubsub.addr_sub': 'tcp://localhost', + 'bridges.control_pubsub.addr_pub': 'tcp://localhost'}}) + + ru.zmq.Subscriber = mock.Mock() + ru.zmq.Publisher = mock.Mock() + ru.zmq.test_pubsub = mock.Mock() + pmgr._session.uid = str(time.time()) # restart uid counter sandbox_url = mock.Mock() sandbox_url.path = './' diff --git a/tests/unit_tests/test_raptor/test_master.py b/tests/unit_tests/test_raptor/test_master.py index f9bd838f4e..d1d133ceb2 100644 --- a/tests/unit_tests/test_raptor/test_master.py +++ b/tests/unit_tests/test_raptor/test_master.py @@ -21,13 +21,20 @@ class RaptorMasterTC(TestCase): _cleanup_files = [] + def _init_primary_side_effect(self): + + self._log = mock.MagicMock() + self._prof = mock.MagicMock() + self._rep = mock.MagicMock() + self._reg = mock.MagicMock() + + # -------------------------------------------------------------------------- # @classmethod - @mock.patch.object(rp.Session, '_init_primary', return_value=None) - @mock.patch.object(rp.Session, '_get_logger') - @mock.patch.object(rp.Session, '_get_profiler') - @mock.patch.object(rp.Session, '_get_reporter') + @mock.patch.object(rp.Session, '_init_primary', + side_effect=_init_primary_side_effect, + autospec=True) def setUpClass(cls, *args, **kwargs) -> None: cls._session = rp.Session() diff --git a/tests/unit_tests/test_rm/test_base.py b/tests/unit_tests/test_rm/test_base.py index 5da1e1408c..5eeb3fd0c9 100755 --- a/tests/unit_tests/test_rm/test_base.py +++ b/tests/unit_tests/test_rm/test_base.py @@ -45,6 +45,7 @@ def test_init_from_registry(self, mocked_prof, mocked_log, mocked_lm): c.close() rm = ResourceManager(cfg=ru.TypedDict({'reg_addr': reg.addr}), + rcfg=ru.TypedDict(), log=mock.Mock(), prof=mock.Mock()) self.assertIsInstance(rm.info, RMInfo) @@ -71,13 +72,15 @@ def test_init_from_scratch(self, mocked_init): 'lfs_size_per_node': 100, 'resource_cfg' : {}}) - rm = ResourceManager(cfg=None, log=None, prof=None) + rm = ResourceManager(cfg=None, rcfg=None, log=None, prof=None) + rm._rcfg = cfg rm._cfg = cfg rm._log = mock.Mock() rm._prof = mock.Mock() def _init_from_scratch(rm_info): rm_info.node_list = rm._get_node_list([('node00', 16)], rm_info) + rm_info.cores_per_node = rm_info['cores_per_node'] return rm_info # RM specific method (to update node_list and cores_per_node if needed) @@ -117,13 +120,21 @@ def test_cores_cpus_map(self, mocked_init): tc_map['result']): def _init_from_scratch(rm_info_tc, rm_info_input): - _rm_info = ru.TypedDict(rm_info_tc) - _rm_info.update(rm_info_input) + + _rm_info = ru.TypedDict(rm_info_input) + _rm_info.update(rm_info_tc) + + # FIXME: why is this not picked up from the test cases? + _rm_info.cores_per_node = 8 + _rm_info.requested_cores = 8 + _rm_info.gpus_per_node = 2 + _rm_info.requested_gpus = 2 return _rm_info from functools import partial - rm._cfg = ru.TypedDict(rm_cfg) + rm._cfg = ru.TypedDict({'nodes': 1}) + rm._rcfg = ru.TypedDict(rm_cfg) rm._init_from_scratch = partial(_init_from_scratch, rm_info) if result == 'AssertionError': @@ -139,7 +150,7 @@ def _init_from_scratch(rm_info_tc, rm_info_input): @mock.patch.object(ResourceManager, '__init__', return_value=None) def test_set_info(self, mocked_init): - rm = ResourceManager(cfg=None, log=None, prof=None) + rm = ResourceManager(cfg=None, rcfg=None, log=None, prof=None) with self.assertRaises(KeyError): # required attributes are missed @@ -171,7 +182,8 @@ def test_find_launcher(self, mocked_lm, mocked_init): cfg = ru.TypedDict({ 'cores' : 16, 'gpus' : 2, - 'resource_cfg' : { + }) + rcfg = ru.TypedDict({ 'cores_per_node' : 16, 'gpus_per_node' : 2, 'lfs_path_per_node': '${LOCAL}', @@ -179,9 +191,9 @@ def test_find_launcher(self, mocked_lm, mocked_init): 'launch_methods' : { 'order': ['SRUN'], 'SRUN' : {} - }}}) + }}) - rm = ResourceManager.create('FORK', cfg, None, None) + rm = ResourceManager.create('FORK', cfg, rcfg, None, None) rm._launch_order = ['SRUN'] rm._launchers = {'SRUN': mocked_lm} diff --git a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json index c710e43520..f68fc93bee 100644 --- a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json +++ b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json @@ -11,6 +11,7 @@ "mem" : 128 } ], + "cores_per_node" : 8, "agent_node_list" : [], "service_node_list" : [] }, @@ -25,6 +26,7 @@ "mem" : 128 } ], + "cores_per_node" : 8, "agent_node_list" : [], "service_node_list" : [] }, @@ -39,6 +41,7 @@ "mem" : 128 } ], + "cores_per_node" : 8, "agent_node_list" : [], "service_node_list" : [] }, @@ -53,6 +56,7 @@ "mem" : 128 } ], + "cores_per_node" : 8, "agent_node_list" : [], "service_node_list" : [] } diff --git a/tests/unit_tests/test_scheduler/test_cases/test_base.json b/tests/unit_tests/test_scheduler/test_cases/test_base.json index a5837d3399..59071a4907 100644 --- a/tests/unit_tests/test_scheduler/test_cases/test_base.json +++ b/tests/unit_tests/test_scheduler/test_cases/test_base.json @@ -35,6 +35,7 @@ "mem_per_node" : 0, "requested_nodes": 1, "requested_cores": 8, + "cores_per_node" : 8, "requested_gpus" : 0 } }, diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py index 44b83ec204..d01d6d8ad5 100755 --- a/tests/unit_tests/test_utils.py +++ b/tests/unit_tests/test_utils.py @@ -200,32 +200,32 @@ def test_resource_cfg(self): rfs_url = rpu_misc.get_resource_fs_url('local.localhost') print('==== 1', rfs_url) print('==== 2', str(rfs_url)) - print('==== 3', rcfg_local.local.filesystem_endpoint) + print('==== 3', rcfg_local.schemas.local.filesystem_endpoint) self.assertIsInstance(rfs_url, ru.Url) - self.assertEqual(str(rfs_url), rcfg_local.local.filesystem_endpoint) + self.assertEqual(str(rfs_url), rcfg_local.schemas.local.filesystem_endpoint) # switched default access schema, which is the first in the list - rpu_misc._rcfgs.local.localhost.schemas = ['ssh', 'local'] + rpu_misc._rcfgs.local.localhost.default_schema = 'ssh' rfs_url = rpu_misc.get_resource_fs_url('local.localhost') - self.assertEqual(str(rfs_url), rcfg_local.ssh.filesystem_endpoint) + self.assertEqual(str(rfs_url), rcfg_local.schemas.ssh.filesystem_endpoint) rfs_url = rpu_misc.get_resource_fs_url(resource='access.bridges2', schema='gsissh') self.assertEqual(str(rfs_url), - rcfgs.access.bridges2.gsissh.filesystem_endpoint) + rcfgs.access.bridges2.schemas.gsissh.filesystem_endpoint) # test resource job URL rj_url = rpu_misc.get_resource_job_url('local.localhost') self.assertIsInstance(rj_url, ru.Url) - schema_default = rpu_misc._rcfgs.local.localhost.schemas[0] + schema_default = rpu_misc._rcfgs.local.localhost.default_schema self.assertEqual(str(rj_url), - rcfg_local[schema_default].job_manager_endpoint) + rcfg_local.schemas[schema_default].job_manager_endpoint) rj_url = rpu_misc.get_resource_job_url(resource='access.bridges2', schema='gsissh') self.assertEqual(str(rj_url), - rcfgs.access.bridges2.gsissh.job_manager_endpoint) + rcfgs.access.bridges2.schemas.gsissh.job_manager_endpoint) # ------------------------------------------------------------------------------ From 1608886871c746bd3da9dc49ddb5bf1a12ca3617 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 10 Aug 2023 19:23:03 +0200 Subject: [PATCH 084/171] test fixes --- .../pilot/agent/resource_manager/base.py | 8 ++++- src/radical/pilot/pmgr/launching/base.py | 1 - src/radical/pilot/session.py | 34 +++++++++++-------- src/radical/pilot/utils/misc.py | 7 ---- .../task_overlay_worker/drive_worker.py | 6 ++-- tests/component_tests/test_session.py | 24 +++++++------ tests/integration_tests/test_agent_bridge.py | 8 ++--- tests/integration_tests/test_lm/test_jsrun.py | 1 - tests/unit_tests/test_executing/test_popen.py | 1 - tests/unit_tests/test_pytask.py | 2 -- tests/unit_tests/test_raptor/test_worker.py | 1 - tests/unit_tests/test_rm/test_base.py | 7 +--- .../test_cases/test_cores_gpus_map.json | 12 +++++++ tests/unit_tests/test_rm/test_fork.py | 6 ++-- tests/unit_tests/test_scheduler/test_base.py | 3 +- .../test_scheduler/test_cases/test_base.json | 2 +- tests/unit_tests/test_tmgr/test_tmgr.py | 1 - tests/unit_tests/test_utils.py | 3 -- 18 files changed, 67 insertions(+), 60 deletions(-) diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index 0330794394..c12d7e167a 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -265,7 +265,11 @@ def init_from_scratch(self): n_nodes) rm_info.requested_nodes = math.ceil(n_nodes) - print('========== 1', rm_info) + print('==== alloc_nodes: %s' % alloc_nodes) + + import pprint + pprint.pprint(rm_info.as_dict()) + assert alloc_nodes >= rm_info.requested_nodes assert alloc_nodes * rm_info.cores_per_node >= rm_info.requested_cores @@ -323,6 +327,8 @@ def _prepare_launch_methods(self, rm_info): launch_methods = self._rm_info.launch_methods self._launchers = {} + import pprint + pprint.pprint(rm_info.as_dict()) self._launch_order = launch_methods.get('order') or list(launch_methods) for lm_name in list(self._launch_order): diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index a50a1a8c9e..ef7a4b0bd0 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -657,7 +657,6 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): raise RuntimeError("'global_virtenv' is deprecated (%s)" % resource) # Create a host:port string for use by the bootstrap_0. - print('==========', agent_proxy_url) tmp = ru.Url(agent_proxy_url) if tmp.port: hostport = "%s:%d" % (tmp.host, tmp.port) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 53a3735d0a..b2fdde265d 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -191,14 +191,12 @@ def __init__(self, proxy_url: Optional[str ] = None, # initialization is different for each session type # NOTE: we could refactor this to session sub-classes - print('=========== here 5', self._uid) if self._role == self._PRIMARY: # if user did not set a uid, we need to generate a new ID if not self._uid: self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) - print('=========== here 4', self._uid) self._init_primary() @@ -253,7 +251,6 @@ def _init_primary(self): # we still call `_init_cfg` to complete missing config settings # FIXME: completion only needed by `PRIMARY` - print('=========== here 3', self._uid) self._init_cfg_from_scratch() # primary sessions create a registry service @@ -368,7 +365,6 @@ def _connect_registry(self): # -------------------------------------------------------------------------- # def _init_cfg_from_scratch(self): - print('=========== here 2', self._uid) # A primary session will at this point have a registry client connected # to its registry service. Further, self._cfg will either be a config @@ -434,15 +430,11 @@ def _init_cfg_from_scratch(self): def_cfg.report_dir = self._cfg.path def_cfg.profile_dir = self._cfg.path - print('=========== here 1', self._uid) - self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid, level=self._cfg.get('debug')) - print('=================== after: %s' % self._prof) - from . import version_detail as rp_version_detail self._log.info('radical.pilot version: %s', rp_version_detail) self._log.info('radical.saga version: %s', rs.version_detail) @@ -1217,24 +1209,36 @@ def get_resource_config(self, resource, schema=None): resource_cfg = copy.deepcopy(self._rcfgs[domain][host]) - if not schema: - if 'schemas' in resource_cfg: - schema = resource_cfg['schemas'][0] + if not schema: + schema = resource_cfg.get('default_schema') + + while schema: - if schema: - if schema not in resource_cfg: + if schema not in resource_cfg['schemas']: raise RuntimeError("schema %s unknown for resource %s" % (schema, resource)) - for key in resource_cfg[schema]: + cnt = 0 + val = resource_cfg['schemas'][schema] + + if isinstance(val, str): + schema = val + cnt += 1 + if cnt > 10: + break + continue + + for key in val: # merge schema specific resource keys into the # resource config - resource_cfg[key] = resource_cfg[schema][key] + resource_cfg[key] = val[key] + break resource_cfg.label = resource return resource_cfg + # # -------------------------------------------------------------------------- # # # def fetch_json(self, tgt=None): diff --git a/src/radical/pilot/utils/misc.py b/src/radical/pilot/utils/misc.py index fda2571161..45d5222a44 100644 --- a/src/radical/pilot/utils/misc.py +++ b/src/radical/pilot/utils/misc.py @@ -183,13 +183,6 @@ def get_resource_fs_url(resource: str, schema = rcfg['default_schema'] # return a deep copy - import pprint - pprint.pprint(rcfg.as_dict()) - print(schema) - print(1, schema) - print(2, rcfg['schemas'][schema]) - print(3, rcfg['schemas'][schema]['filesystem_endpoint']) - return ru.Url(rcfg['schemas'][schema]['filesystem_endpoint']) diff --git a/tests/component_tests/task_overlay_worker/drive_worker.py b/tests/component_tests/task_overlay_worker/drive_worker.py index 23ca9caf30..92efdf8bb3 100755 --- a/tests/component_tests/task_overlay_worker/drive_worker.py +++ b/tests/component_tests/task_overlay_worker/drive_worker.py @@ -29,9 +29,9 @@ } for i in range(n)] ]) - for i in range(n): - for res in q_out.get(): - print('%s: %s' % (res['req'], res['out'])) + # for i in range(n): + # for res in q_out.get(): + # print('%s: %s' % (res['req'], res['out'])) # ------------------------------------------------------------------------------ diff --git a/tests/component_tests/test_session.py b/tests/component_tests/test_session.py index 49863898ce..c9260eda02 100755 --- a/tests/component_tests/test_session.py +++ b/tests/component_tests/test_session.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # pylint: disable=protected-access, unused-argument, no-value-for-parameter __copyright__ = 'Copyright 2020-2022, The RADICAL-Cybertools Team' @@ -38,7 +40,6 @@ def se_init(self): tgt = rcfg[schema] rcfg[schema] = rcfg[tgt] - print('====', self._rcfgs.keys()) # -------------------------------------------------------------------------- # @@ -53,6 +54,7 @@ def setUpClass(cls, *args, **kwargs) -> None: cls._session = Session() cls._cleanup_files.append(cls._session.uid) + # -------------------------------------------------------------------------- # @classmethod @@ -87,12 +89,14 @@ def test_get_resource_config(self): # schemas are ["ssh", "gsissh"] rcfg = self._session.get_resource_config(rcfg_label) + + default_schema = rcfg.default_schema self.assertEqual(rcfg.job_manager_endpoint, - rcfg[rcfg.schemas[0]].job_manager_endpoint) + rcfg.schemas[default_schema].job_manager_endpoint) new_schema = 'gsissh' rcfg = self._session.get_resource_config(rcfg_label, schema=new_schema) self.assertEqual(rcfg.job_manager_endpoint, - rcfg[new_schema].job_manager_endpoint) + rcfg.schemas[new_schema].job_manager_endpoint) # check exceptions @@ -119,12 +123,12 @@ def test_resource_schema_alias(self, mocked_config, *args, **kwargs): mocked_config.return_value = ru.TypedDict({ 'local': { 'test': { - 'schemas' : ['schema_origin', - 'schema_alias', - 'schema_alias_alias'], - 'schema_origin' : {'param_0': 'value_0'}, - 'schema_alias' : 'schema_origin', - 'schema_alias_alias': 'schema_alias' + 'default_schema' :'schema_origin', + 'schemas' : { + 'schema_origin' : {'param_0': 'value_0'}, + 'schema_alias' : 'schema_origin', + 'schema_alias_alias': 'schema_alias' + } } } }) @@ -202,7 +206,6 @@ def test_get_resource_sandbox(self): self._session._cache['resource_sandbox'] = {} # NCSA: split `project` by "-" - print('====', self._session._rcfgs.keys()) pilot['description'].update({'resource': 'ncsa.delta', 'project' : 'bbka-delta-cpu'}) self.assertIn('/bbka/', @@ -222,6 +225,7 @@ def test_get_resource_sandbox(self): if __name__ == '__main__': tc = TestSession() + tc.setUpClass() tc.test_list_resources() tc.test_get_resource_config() tc.test_resource_schema_alias() diff --git a/tests/integration_tests/test_agent_bridge.py b/tests/integration_tests/test_agent_bridge.py index 7dd5f76e84..2c88bb9e7e 100755 --- a/tests/integration_tests/test_agent_bridge.py +++ b/tests/integration_tests/test_agent_bridge.py @@ -10,10 +10,10 @@ # def test_agent_bridge(url=None): - if url: - bridge = ru.zmq.Client(url=url) - else: - bridge = ru.zmq.Client(server='server.0000') + if not url: + return + + bridge = ru.zmq.Client(url=url) sid = 'foo' diff --git a/tests/integration_tests/test_lm/test_jsrun.py b/tests/integration_tests/test_lm/test_jsrun.py index d3779f2c9e..ecaceeaf63 100755 --- a/tests/integration_tests/test_lm/test_jsrun.py +++ b/tests/integration_tests/test_lm/test_jsrun.py @@ -72,7 +72,6 @@ def test_command(self, mocked_init): result = test_case['result'] for i in range(len(result)): if '{node}' in result[i]: - print(result[i]) result[i] = result[i].format(node=self.node_name) log = mock.Mock() diff --git a/tests/unit_tests/test_executing/test_popen.py b/tests/unit_tests/test_executing/test_popen.py index 8fac46322e..39c7718ce0 100755 --- a/tests/unit_tests/test_executing/test_popen.py +++ b/tests/unit_tests/test_executing/test_popen.py @@ -184,7 +184,6 @@ def test_check_running(self, mocked_killpg, mocked_init): to_watch.append(task) to_cancel.append(task['uid']) pex._check_running(to_watch, to_cancel) - print('to_cancel:', to_cancel) self.assertFalse(to_cancel) # case 2: exit_code == 0 diff --git a/tests/unit_tests/test_pytask.py b/tests/unit_tests/test_pytask.py index 39761c4117..18b3ba569d 100644 --- a/tests/unit_tests/test_pytask.py +++ b/tests/unit_tests/test_pytask.py @@ -24,8 +24,6 @@ def AB(z): return 2 * z wrapped_function = partial(AA, AB) - print(type(wrapped_function)) - print(callable(wrapped_function)) pytask_class_obj = PythonTask(wrapped_function) self.assertIsInstance(pytask_class_obj, str) diff --git a/tests/unit_tests/test_raptor/test_worker.py b/tests/unit_tests/test_raptor/test_worker.py index 8e595fc488..778bcbc59a 100755 --- a/tests/unit_tests/test_raptor/test_worker.py +++ b/tests/unit_tests/test_raptor/test_worker.py @@ -94,7 +94,6 @@ def test_exec(self, mocked_init, mocked_Logger): # component._log = mocked_Logger # data = {'code': '2 + 5'} # out, err, ret, val = component._exec(data) - # print('===', [out, err, ret, val]) # # self.assertEqual(ret, 0) # self.assertEqual(val, {7}) diff --git a/tests/unit_tests/test_rm/test_base.py b/tests/unit_tests/test_rm/test_base.py index 5eeb3fd0c9..740e7430f8 100755 --- a/tests/unit_tests/test_rm/test_base.py +++ b/tests/unit_tests/test_rm/test_base.py @@ -124,16 +124,11 @@ def _init_from_scratch(rm_info_tc, rm_info_input): _rm_info = ru.TypedDict(rm_info_input) _rm_info.update(rm_info_tc) - # FIXME: why is this not picked up from the test cases? - _rm_info.cores_per_node = 8 - _rm_info.requested_cores = 8 - _rm_info.gpus_per_node = 2 - _rm_info.requested_gpus = 2 return _rm_info from functools import partial - rm._cfg = ru.TypedDict({'nodes': 1}) + rm._cfg = ru.TypedDict(rm_cfg['resource_cfg']) rm._rcfg = ru.TypedDict(rm_cfg) rm._init_from_scratch = partial(_init_from_scratch, rm_info) diff --git a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json index f68fc93bee..298e63a5f1 100644 --- a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json +++ b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json @@ -11,7 +11,10 @@ "mem" : 128 } ], + "requested_cores" : 8, + "requested_gpus" : 2, "cores_per_node" : 8, + "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] }, @@ -26,7 +29,10 @@ "mem" : 128 } ], + "requested_cores" : 8, + "requested_gpus" : 2, "cores_per_node" : 8, + "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] }, @@ -41,7 +47,10 @@ "mem" : 128 } ], + "requested_cores" : 8, + "requested_gpus" : 2, "cores_per_node" : 8, + "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] }, @@ -56,7 +65,10 @@ "mem" : 128 } ], + "requested_cores" : 8, + "requested_gpus" : 2, "cores_per_node" : 8, + "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] } diff --git a/tests/unit_tests/test_rm/test_fork.py b/tests/unit_tests/test_rm/test_fork.py index 7385305401..7d31c1ded9 100755 --- a/tests/unit_tests/test_rm/test_fork.py +++ b/tests/unit_tests/test_rm/test_fork.py @@ -26,7 +26,8 @@ def test_init_from_scratch(self, mocked_logger, mocked_mp_cpu_count, mocked_init): rm_fork = Fork(cfg=None, log=None, prof=None) - rm_fork._cfg = ru.TypedDict({'resource_cfg': {}}) + rm_fork._cfg = ru.TypedDict({'resource_cfg': {}}) + rm_fork._rcfg = ru.TypedDict() rm_fork._log = mocked_logger rm_fork._cfg.resource_cfg.fake_resources = False @@ -58,10 +59,11 @@ def test_init_from_scratch(self, mocked_logger, mocked_mp_cpu_count, rm_fork._init_from_scratch(rm_info) # fake/virtual resource, request more cores than available/detected - rm_fork._cfg.resource_cfg.fake_resources = True + rm_fork._rcfg.fake_resources = True rm_info.requested_nodes = 0 # will be calculated during init rm_info.requested_cores = mocked_mp_cpu_count() * 10 + rm_info = rm_fork._init_from_scratch(rm_info) self.assertGreater(rm_info.requested_cores, mocked_mp_cpu_count()) self.assertGreater(rm_info.requested_nodes, 1) diff --git a/tests/unit_tests/test_scheduler/test_base.py b/tests/unit_tests/test_scheduler/test_base.py index e2ef26bc08..4eff012631 100755 --- a/tests/unit_tests/test_scheduler/test_base.py +++ b/tests/unit_tests/test_scheduler/test_base.py @@ -63,7 +63,8 @@ def _mock_get(_c, name): mock_get = partial(_mock_get, c) sched._cfg = ru.Config(from_dict={'reg_addr': 'addr'}) - sched._reg = ru.Config(from_dict=c['config']) + sched._reg = ru.Config(from_dict={'cfg': c['config'], + 'rcfg': c['config']['resource_cfg']}) with mock.patch.object(ru.zmq.RegistryClient, 'get', mock_get): if 'RuntimeError' in c['result']: diff --git a/tests/unit_tests/test_scheduler/test_cases/test_base.json b/tests/unit_tests/test_scheduler/test_cases/test_base.json index 59071a4907..81b7f8772a 100644 --- a/tests/unit_tests/test_scheduler/test_cases/test_base.json +++ b/tests/unit_tests/test_scheduler/test_cases/test_base.json @@ -4,8 +4,8 @@ { "config": { "pid" : "pid.0003", - "resource_manager": "FORK", "resource_cfg" : { + "resource_manager": "FORK", "launch_methods" : { "FORK": {} } diff --git a/tests/unit_tests/test_tmgr/test_tmgr.py b/tests/unit_tests/test_tmgr/test_tmgr.py index 4aeca8327e..1a61c6e095 100755 --- a/tests/unit_tests/test_tmgr/test_tmgr.py +++ b/tests/unit_tests/test_tmgr/test_tmgr.py @@ -46,7 +46,6 @@ def test_add_pilots(self, mocked_logger, mocked_init): global_pilots = [] def publish_side_effect(rpc, pilot): - print(type(pilot), pilot) nonlocal global_pilots global_pilots.append(pilot) diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py index d01d6d8ad5..c084fc000c 100755 --- a/tests/unit_tests/test_utils.py +++ b/tests/unit_tests/test_utils.py @@ -198,9 +198,6 @@ def test_resource_cfg(self): # test resource filesystem URL rfs_url = rpu_misc.get_resource_fs_url('local.localhost') - print('==== 1', rfs_url) - print('==== 2', str(rfs_url)) - print('==== 3', rcfg_local.schemas.local.filesystem_endpoint) self.assertIsInstance(rfs_url, ru.Url) self.assertEqual(str(rfs_url), rcfg_local.schemas.local.filesystem_endpoint) From c752188981007e57105084f99d2fb97a61875e16 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 10 Aug 2023 21:40:44 +0200 Subject: [PATCH 085/171] space --- tests/component_tests/test_component.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/component_tests/test_component.py b/tests/component_tests/test_component.py index ee52a1418f..c88ebf76b3 100755 --- a/tests/component_tests/test_component.py +++ b/tests/component_tests/test_component.py @@ -46,7 +46,6 @@ def test_cm_start_components(self, mocked_sh_callout, mocked_init): # FIXME: heartbeats use the sessions HB channel which we don't have return - cfg = { 'path' : '/tmp', 'heartbeat' : {'timeout': 10}, From 84baf1ef673d0c678434ad826dcf15e8b6d852e3 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 10 Aug 2023 19:27:32 -0400 Subject: [PATCH 086/171] fixed RM related tests --- .../pilot/agent/resource_manager/base.py | 12 +----- src/radical/pilot/utils/misc.py | 4 +- tests/unit_tests/test_rm/test_base.py | 15 +++---- .../test_cases/test_cores_gpus_map.json | 42 +++++++++---------- 4 files changed, 31 insertions(+), 42 deletions(-) diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index c12d7e167a..c97e241dec 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -220,8 +220,8 @@ def init_from_scratch(self): rm_info.threads_per_gpu = 1 rm_info.mem_per_gpu = None - rm_info.mem_per_node = self._cfg.mem_per_node or 0 - system_architecture = self._cfg.get('system_architecture', {}) + rm_info.mem_per_node = self._rcfg.mem_per_node or 0 + system_architecture = self._rcfg.get('system_architecture', {}) rm_info.threads_per_core = int(os.environ.get('RADICAL_SMT') or system_architecture.get('smt', 1)) @@ -265,12 +265,6 @@ def init_from_scratch(self): n_nodes) rm_info.requested_nodes = math.ceil(n_nodes) - print('==== alloc_nodes: %s' % alloc_nodes) - - import pprint - pprint.pprint(rm_info.as_dict()) - - assert alloc_nodes >= rm_info.requested_nodes assert alloc_nodes * rm_info.cores_per_node >= rm_info.requested_cores assert alloc_nodes * rm_info.gpus_per_node >= rm_info.requested_gpus @@ -327,8 +321,6 @@ def _prepare_launch_methods(self, rm_info): launch_methods = self._rm_info.launch_methods self._launchers = {} - import pprint - pprint.pprint(rm_info.as_dict()) self._launch_order = launch_methods.get('order') or list(launch_methods) for lm_name in list(self._launch_order): diff --git a/src/radical/pilot/utils/misc.py b/src/radical/pilot/utils/misc.py index 45d5222a44..9578928253 100644 --- a/src/radical/pilot/utils/misc.py +++ b/src/radical/pilot/utils/misc.py @@ -4,7 +4,7 @@ import os import time -from typing import Union +from typing import List, Union import radical.utils as ru @@ -52,7 +52,7 @@ def get_rusage() -> str: # ------------------------------------------------------------------------------ # -def create_tar(tgt: str, dnames: list[str]) -> None: +def create_tar(tgt: str, dnames: List[str]) -> None: ''' Create a tarball on the file system which contains all given directories ''' diff --git a/tests/unit_tests/test_rm/test_base.py b/tests/unit_tests/test_rm/test_base.py index 740e7430f8..da15827844 100755 --- a/tests/unit_tests/test_rm/test_base.py +++ b/tests/unit_tests/test_rm/test_base.py @@ -73,8 +73,9 @@ def test_init_from_scratch(self, mocked_init): 'resource_cfg' : {}}) rm = ResourceManager(cfg=None, rcfg=None, log=None, prof=None) - rm._rcfg = cfg rm._cfg = cfg + rm._rcfg = ru.Config(cfg={}) + rm._log = mock.Mock() rm._prof = mock.Mock() @@ -111,7 +112,7 @@ def test_cores_cpus_map(self, mocked_init): tc_map = ru.read_json('%s/test_cases/test_cores_gpus_map.json' % base) - rm = ResourceManager(cfg=None, log=None, prof=None) + rm = ResourceManager(cfg=None, rcfg=None, log=None, prof=None) rm._log = mock.Mock() rm._prof = mock.Mock() @@ -120,16 +121,16 @@ def test_cores_cpus_map(self, mocked_init): tc_map['result']): def _init_from_scratch(rm_info_tc, rm_info_input): - - _rm_info = ru.TypedDict(rm_info_input) - _rm_info.update(rm_info_tc) + _rm_info = ru.TypedDict(rm_info_tc) + _rm_info.update(rm_info_input) return _rm_info from functools import partial - rm._cfg = ru.TypedDict(rm_cfg['resource_cfg']) - rm._rcfg = ru.TypedDict(rm_cfg) + rm._rcfg = ru.TypedDict(rm_cfg['rcfg']) + del rm_cfg['rcfg'] + rm._cfg = ru.TypedDict(rm_cfg) rm._init_from_scratch = partial(_init_from_scratch, rm_info) if result == 'AssertionError': diff --git a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json index 298e63a5f1..62c433f915 100644 --- a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json +++ b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json @@ -81,11 +81,10 @@ "cores_per_node" : 8, "gpus_per_node" : 2, "lfs_size_per_node" : 0, - "resource_cfg" : { - "mem_per_node" : 128, - "system_architecture": { - "blocked_cores" : [] - }} + "rcfg" : { + "mem_per_node" : 128, + "system_architecture": {"blocked_cores" : []} + } }, { "nodes" : 1, @@ -94,12 +93,11 @@ "cores_per_node" : 12, "gpus_per_node" : 2, "lfs_size_per_node" : 0, - "resource_cfg" : { - "mem_per_node" : 128, - "system_architecture": { - "blocked_cores" : [0, 2], - "blocked_gpus" : [1] - }} + "rcfg" : { + "mem_per_node" : 128, + "system_architecture": {"blocked_cores" : [0, 2], + "blocked_gpus" : [1]} + } }, { # requested more NODES than allocated @@ -109,12 +107,11 @@ "cores_per_node" : 12, "gpus_per_node" : 2, "lfs_size_per_node" : 0, - "resource_cfg" : { - "mem_per_node" : 128, - "system_architecture": { - "blocked_cores" : [0, 2], - "blocked_gpus" : [1] - }} + "rcfg" : { + "mem_per_node" : 128, + "system_architecture": {"blocked_cores" : [0, 2], + "blocked_gpus" : [1]} + } }, { # requested more CORES than allocated @@ -124,12 +121,11 @@ "cores_per_node" : 12, "gpus_per_node" : 2, "lfs_size_per_node" : 0, - "resource_cfg" : { - "mem_per_node" : 128, - "system_architecture": { - "blocked_cores" : [0, 2], - "blocked_gpus" : [1] - }} + "rcfg" : { + "mem_per_node" : 128, + "system_architecture": {"blocked_cores" : [0, 2], + "blocked_gpus" : [1]} + } } ], "result": [ From 2307c59a17a8699367c8c6e60693e0e24f5e7635 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 10 Aug 2023 19:31:04 -0400 Subject: [PATCH 087/171] fixed RM related tests --- .../test_rm/test_cases/test_cores_gpus_map.json | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json index 62c433f915..86d0ef337a 100644 --- a/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json +++ b/tests/unit_tests/test_rm/test_cases/test_cores_gpus_map.json @@ -11,10 +11,6 @@ "mem" : 128 } ], - "requested_cores" : 8, - "requested_gpus" : 2, - "cores_per_node" : 8, - "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] }, @@ -29,10 +25,6 @@ "mem" : 128 } ], - "requested_cores" : 8, - "requested_gpus" : 2, - "cores_per_node" : 8, - "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] }, @@ -47,10 +39,6 @@ "mem" : 128 } ], - "requested_cores" : 8, - "requested_gpus" : 2, - "cores_per_node" : 8, - "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] }, @@ -65,10 +53,6 @@ "mem" : 128 } ], - "requested_cores" : 8, - "requested_gpus" : 2, - "cores_per_node" : 8, - "gpus_per_node" : 2, "agent_node_list" : [], "service_node_list" : [] } From ebf6a000ef15eea92887a282d63c794486eb1785 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 10 Aug 2023 19:58:04 -0400 Subject: [PATCH 088/171] fixed Sched related tests --- src/radical/pilot/agent/resource_manager/base.py | 7 ++++--- .../test_scheduler/test_cases/test_base.json | 11 +++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/radical/pilot/agent/resource_manager/base.py b/src/radical/pilot/agent/resource_manager/base.py index c97e241dec..dcb9c8d432 100644 --- a/src/radical/pilot/agent/resource_manager/base.py +++ b/src/radical/pilot/agent/resource_manager/base.py @@ -71,6 +71,7 @@ class RMInfo(ru.TypedDict): 'threads_per_core' : 1, 'gpus_per_node' : 0, 'threads_per_gpu' : 1, + 'launch_methods' : {} } @@ -156,7 +157,7 @@ def __init__(self, cfg, rcfg, log, prof): self._set_info(rm_info) # set up launch methods even when initialized from registry info - self._prepare_launch_methods(rm_info) + self._prepare_launch_methods() # -------------------------------------------------------------------------- @@ -317,7 +318,7 @@ def init_from_scratch(self): # -------------------------------------------------------------------------- # - def _prepare_launch_methods(self, rm_info): + def _prepare_launch_methods(self): launch_methods = self._rm_info.launch_methods self._launchers = {} @@ -333,7 +334,7 @@ def _prepare_launch_methods(self, rm_info): lm_cfg.reg_addr = self._cfg.reg_addr lm_cfg.resource = self._cfg.resource self._launchers[lm_name] = rpa.LaunchMethod.create( - lm_name, lm_cfg, rm_info, self._log, self._prof) + lm_name, lm_cfg, self._rm_info, self._log, self._prof) except Exception as e: self._log.exception('skip lm %s', lm_name) diff --git a/tests/unit_tests/test_scheduler/test_cases/test_base.json b/tests/unit_tests/test_scheduler/test_cases/test_base.json index 81b7f8772a..3263ece94d 100644 --- a/tests/unit_tests/test_scheduler/test_cases/test_base.json +++ b/tests/unit_tests/test_scheduler/test_cases/test_base.json @@ -5,10 +5,7 @@ "config": { "pid" : "pid.0003", "resource_cfg" : { - "resource_manager": "FORK", - "launch_methods" : { - "FORK": {} - } + "resource_manager": "FORK" } }, "registry": { @@ -35,8 +32,10 @@ "mem_per_node" : 0, "requested_nodes": 1, "requested_cores": 8, - "cores_per_node" : 8, - "requested_gpus" : 0 + "requested_gpus" : 0, + "launch_methods" : { + "FORK": {} + } } }, "result": [ From b560c056544805b4668d8d9faa71ef19e4209f3c Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 10 Aug 2023 20:00:49 -0400 Subject: [PATCH 089/171] cleaned obsolete configs --- src/radical/pilot/configs/agent_rhea.json | 67 -------------- .../pilot/configs/agent_summit_sa.json | 92 ------------------- 2 files changed, 159 deletions(-) delete mode 100644 src/radical/pilot/configs/agent_rhea.json delete mode 100644 src/radical/pilot/configs/agent_summit_sa.json diff --git a/src/radical/pilot/configs/agent_rhea.json b/src/radical/pilot/configs/agent_rhea.json deleted file mode 100644 index ce20ed5225..0000000000 --- a/src/radical/pilot/configs/agent_rhea.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "staging_schema" : "staging", - "max_io_loglength" : 1024, - "network_interface" : "ipogif0", - - "heartbeat" : { - "interval" : 1.0, - "timeout" : 60.0 - }, - - "target" : "local", - "bridges" : { - "agent_staging_input_queue" : { "log_level" : "off", - "stall_hwm" : 1, - "bulk_size" : 1}, - "agent_scheduling_queue" : { "log_level" : "off", - "stall_hwm" : 1, - "bulk_size" : 1}, - "agent_executing_queue" : { "log_level" : "off", - "stall_hwm" : 1, - "bulk_size" : 1}, - "agent_staging_output_queue" : { "log_level" : "off", - "stall_hwm" : 1, - "bulk_size" : 1}, - - "agent_unschedule_pubsub" : {"log_level" : "off"}, - "agent_reschedule_pubsub" : {"log_level" : "off"}, - - "control_pubsub" : {"log_level" : "off"}, - "state_pubsub" : {"log_level" : "off"}, - "log_pubsub" : {"log_level" : "off"} - }, - - "agents": { - "agent_1": { - "target": "node", - "components": { - "AgentStagingInputComponent" : {"count" : 1} - } - }, - "agent_2": { - "target": "node", - "components": { - "AgentSchedulingComponent" : {"count" : 1} - } - }, - "agent_3": { - "target": "node", - "components": { - "AgentSchedulingComponent" : {"count" : 1} - } - }, - "agent_4": { - "target": "node", - "components": { - "AgentStagingOutputComponent" : {"count" : 1} - } - }, - "agent_5": { - "target": "node", - "components": { - "UpdateWorker" : {"count" : 1} - } - } - } -} - diff --git a/src/radical/pilot/configs/agent_summit_sa.json b/src/radical/pilot/configs/agent_summit_sa.json deleted file mode 100644 index 3e72b82a6d..0000000000 --- a/src/radical/pilot/configs/agent_summit_sa.json +++ /dev/null @@ -1,92 +0,0 @@ - -# this is the default agent config which is used if no other config is -# specified for an agent. It contains the minimal set of settings required for -# a functional pilot agent, without any component redundency. - -{ - "max_io_loglength" : 1024, - # "network_interface" : "ipogif0", - - "heartbeat" : { - "interval" : 1.0, - "timeout" : 60.0 - }, - - "target" : "local", - "bridges" : { - "agent_staging_input_queue" : { "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 1}, - "agent_scheduling_queue" : { "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 1}, - "agent_executing_queue" : { "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 1}, - "agent_staging_output_queue" : { "log_level" : "error", - "stall_hwm" : 1, - "bulk_size" : 1}, - - "agent_unschedule_pubsub" : {"log_level" : "error"}, - "agent_schedule_pubsub" : {"log_level" : "error"}, - - "control_pubsub" : {"log_level" : "error"}, - "state_pubsub" : {"log_level" : "error"}, - "log_pubsub" : {"log_level" : "error"} - }, - - "components" : { - # "AgentStagingInputComponent" : {"count" : 1}, - # "AgentExecutingComponent" : {"count" : 1}, - # "AgentStagingOutputComponent" : {"count" : 1}, - "AgentSchedulingComponent" : {"count" : 1} - }, - - "agents": { - "agent_1": { - "target": "node", - "components": { - "AgentStagingInputComponent" : {"count" : 1}, - "AgentStagingOutputComponent" : {"count" : 1}, - "AgentExecutingComponent" : {"count" : 1} - } - }, - "agent_2": { - "target": "node", - "components": { - "AgentStagingInputComponent" : {"count" : 1}, - "AgentStagingOutputComponent" : {"count" : 1}, - "AgentExecutingComponent" : {"count" : 1} - } - }, - "agent_3": { - "target": "node", - "components": { - "AgentStagingInputComponent" : {"count" : 1}, - "AgentStagingOutputComponent" : {"count" : 1}, - "AgentExecutingComponent" : {"count" : 1} - } - }, - "agent_4": { - "target": "node", - "components": { - "AgentStagingInputComponent" : {"count" : 1}, - "AgentStagingOutputComponent" : {"count" : 1}, - "AgentExecutingComponent" : {"count" : 1} - } - # }, - # "agent_5": { - # "target": "node", - # "components": { - # "AgentExecutingComponent" : {"count" : 1} - # } - # }, - # "agent_6": { - # "target": "node", - # "components": { - # "AgentExecutingComponent" : {"count" : 1} - # } - } - } -} - From 05e09e4b0c1e45950cade4b71545c3abfbdc9142 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 10:36:39 -0400 Subject: [PATCH 090/171] cleanup --- src/radical/pilot/messages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/radical/pilot/messages.py b/src/radical/pilot/messages.py index 758d3a0daa..d6395d9578 100644 --- a/src/radical/pilot/messages.py +++ b/src/radical/pilot/messages.py @@ -1,6 +1,6 @@ -from typing import Optional, Dict, Tuple, Any +from typing import Any import radical.utils as ru @@ -9,8 +9,7 @@ # class HeartbeatMessage(ru.Message): - - _schema = {'uid' : str } + _schema = {'uid' : str} _defaults = {'_msg_type': 'heartbeat', 'uid' : None} @@ -35,6 +34,7 @@ class RPCRequestMessage(ru.Message): 'args' : [], 'kwargs' : {}} + ru.Message.register_msg_type('rpc_req', RPCRequestMessage) @@ -58,7 +58,7 @@ class RPCResultMessage(ru.Message): # def __init__(self, rpc_req=None, from_dict=None, **kwargs): - # when constfructed from a request message copy the uid + # when constructed from a request message copy the uid if rpc_req: if not from_dict: From 11198912d194fa5fe7f9cb98af7e1f298457d008 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 11:33:48 -0400 Subject: [PATCH 091/171] fixed bootstrapping for sub-agents --- src/radical/pilot/agent/agent_0.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 6f4aabe269..cfad834395 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -456,14 +456,14 @@ def _start_sub_agents(self): # the configs are written, and the sub-agents can be started. To know # how to do that we create the agent launch method, have it creating - # the respective command lines per agent instance, and run via - # popen. - # + # the respective command lines per agent instance, and run via popen. + + bs_name = '%s/bootstrap_2.sh' for idx, sa in enumerate(self._cfg['agents']): target = self._cfg['agents'][sa]['target'] - cmdline = None + bs_args = [self._sid, self.cfg.reg_addr, sa] if target not in ['local', 'node']: @@ -472,10 +472,8 @@ def _start_sub_agents(self): if target == 'local': # start agent locally - bs_name = '%s/bootstrap_2.sh' % (self._pwd) - args = ' '.join([self._sid, self.cfg.reg_addr, sa]) - cmdline = '/bin/sh -l %s/%s %s' % (self._pwd, bs_name, args) - + bs_path = bs_name % self._pwd + cmdline = '/bin/sh -l %s' % ' '.join([bs_path] + bs_args) else: # target == 'node': @@ -490,7 +488,7 @@ def _start_sub_agents(self): # out for the moment, which will make this unable to # work with a number of launch methods. Can the # offset computation be moved to the ResourceManager? - bs_name = '%s/bootstrap_2.sh' % (self._pwd) + launch_script = '%s/%s.launch.sh' % (self._pwd, sa) exec_script = '%s/%s.exec.sh' % (self._pwd, sa) @@ -502,7 +500,7 @@ def _start_sub_agents(self): 'ranks' : 1, 'cores_per_rank': self._rm.info.cores_per_node, 'executable' : '/bin/sh', - 'arguments' : [bs_name, self._sid, self.cfg.reg_addr, sa] + 'arguments' : [bs_name % self._pwd] + bs_args }).as_dict(), 'slots': {'ranks' : [{'node_name': node['node_name'], 'node_id' : node['node_id'], @@ -534,8 +532,7 @@ def _start_sub_agents(self): tmp = '#!/bin/sh\n\n' tmp += '. ./env/agent.env\n' - tmp += '/bin/sh -l ./bootstrap_2.sh %s %s %s\n\n' \ - % (self._sid, self.cfg.reg_addr, sa) + tmp += '/bin/sh -l %s\n\n' % ' '.join([bs_name % '.'] + bs_args) with ru.ru_open(exec_script, 'w') as fout: fout.write(tmp) From 0b4dafa0920620fc1504d3c0ec42ee098f04c8fe Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 11:45:27 -0400 Subject: [PATCH 092/171] fixed requirements (using RU devel/nodb temporary) --- requirements-docs.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-docs.txt b/requirements-docs.txt index 8c5331dfc1..155199dd7a 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -8,7 +8,7 @@ nbsphinx==0.8.12 python-dotenv[cli] sphinx_copybutton sphinx_rtd_theme>=0.5.1 -radical.pilot>=1.14 +radical.pilot>=1.37 myst_parser docutils==0.17.1 diff --git a/requirements.txt b/requirements.txt index 6faa31d31b..2ad89d4638 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -radical.utils @ git+https://github.com/radical-cybertools/radical.utils@feature/use_registry +radical.utils @ git+https://github.com/radical-cybertools/radical.utils@devel/nodb_2 radical.saga>=1.12 radical.gtod setproctitle From ca5f1aa0e6e3e7d4578b45c52d0c3a8c610994d2 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 11:50:01 -0400 Subject: [PATCH 093/171] cleanup --- src/radical/pilot/session.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index b2fdde265d..846f08537a 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -18,8 +18,10 @@ from . import constants as rpc from . import utils as rpu -from .proxy import Proxy -from .messages import HeartbeatMessage + +from .messages import HeartbeatMessage +from .proxy import Proxy +from .resource_description import ResourceDescription # ------------------------------------------------------------------------------ @@ -35,6 +37,8 @@ class _CloseOptions(ru.TypedDict): """ + _check = True + _schema = { 'download' : bool, 'terminate': bool @@ -46,16 +50,6 @@ class _CloseOptions(ru.TypedDict): } - # -------------------------------------------------------------------------- - # - def __init__(self, from_dict): - - super().__init__(from_dict) - self._verify() - -from .resource_description import ResourceDescription - - # ------------------------------------------------------------------------------ # class Session(rs.Session): From 6dd628952fa92b14045466abe0fcdba1d98b1a4c Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 11:51:13 -0400 Subject: [PATCH 094/171] fixed typo --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2ad89d4638..3a05a2fff5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -radical.utils @ git+https://github.com/radical-cybertools/radical.utils@devel/nodb_2 +radical.utils @ git+https://github.com/radical-cybertools/radical.utils@devel_nodb_2 radical.saga>=1.12 radical.gtod setproctitle From 49cb91488c813a9eed31ef7af6f08132f7091970 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 11:59:22 -0400 Subject: [PATCH 095/171] version RP devel/nodb temporary --- requirements-docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-docs.txt b/requirements-docs.txt index 155199dd7a..f1beeafde5 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -8,7 +8,7 @@ nbsphinx==0.8.12 python-dotenv[cli] sphinx_copybutton sphinx_rtd_theme>=0.5.1 -radical.pilot>=1.37 +radical.pilot @ git+https://github.com/radical-cybertools/radical.pilot@feature/nodb_2 myst_parser docutils==0.17.1 From e17b1ec470beddcaa8f2463a198321d9b2de3085 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 12:32:19 -0400 Subject: [PATCH 096/171] fixed config setup (tutorial) --- docs/source/tutorials/configuration.ipynb | 57 +++++++++++------------ tests/component_tests/test_session.py | 2 +- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/docs/source/tutorials/configuration.ipynb b/docs/source/tutorials/configuration.ipynb index ca7e43ecc7..0ca85f064c 100644 --- a/docs/source/tutorials/configuration.ipynb +++ b/docs/source/tutorials/configuration.ipynb @@ -130,22 +130,21 @@ " \"description\" : \"Short description of the resource\",\n", " \"notes\" : \"Notes about resource usage\",\n", "\n", - " \"schemas\" : [\"local\", \"ssh\", \"batch\", \"interactive\"],\n", - " \"local\" :\n", - " {\n", - " \"job_manager_endpoint\" : \"slurm://frontera.tacc.utexas.edu/\",\n", - " \"filesystem_endpoint\" : \"file://frontera.tacc.utexas.edu/\"\n", - " },\n", - " \"ssh\" :\n", - " {\n", - " \"job_manager_endpoint\" : \"slurm+ssh://frontera.tacc.utexas.edu/\",\n", - " \"filesystem_endpoint\" : \"sftp://frontera.tacc.utexas.edu/\"\n", - " },\n", - " \"batch\" : \"interactive\",\n", - " \"interactive\" :\n", - " {\n", - " \"job_manager_endpoint\" : \"fork://localhost/\",\n", - " \"filesystem_endpoint\" : \"file://localhost/\"\n", + " \"default_schema\" : \"local\",\n", + " \"schemas\" : {\n", + " \"local\" : {\n", + " \"job_manager_endpoint\": \"slurm://frontera.tacc.utexas.edu/\",\n", + " \"filesystem_endpoint\" : \"file://frontera.tacc.utexas.edu/\"\n", + " },\n", + " \"ssh\" : {\n", + " \"job_manager_endpoint\": \"slurm+ssh://frontera.tacc.utexas.edu/\",\n", + " \"filesystem_endpoint\" : \"sftp://frontera.tacc.utexas.edu/\"\n", + " },\n", + " \"batch\" : \"interactive\",\n", + " \"interactive\" : {\n", + " \"job_manager_endpoint\": \"fork://localhost/\",\n", + " \"filesystem_endpoint\" : \"file://localhost/\"\n", + " },\n", " },\n", "\n", " \"default_queue\" : \"production\",\n", @@ -404,10 +403,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[94mnew session: \u001b[39m\u001b[0m[rp.session.three.mturilli.019495.0003]\u001b[39m\u001b[0m\u001b[94m \\\n", - "database : \u001b[39m\u001b[0m[mongodb://rct-tutorial:****@95.217.193.116:27017/rct-tutorial]\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mcreate pilot manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" + "\u001B[94mnew session: \u001B[39m\u001B[0m[rp.session.three.mturilli.019495.0003]\u001B[39m\u001B[0m\u001B[94m \\\n", + "database : \u001B[39m\u001B[0m[mongodb://rct-tutorial:****@95.217.193.116:27017/rct-tutorial]\u001B[39m\u001B[0m\u001B[92m ok\n", + "\u001B[39m\u001B[0m\u001B[94mcreate pilot manager\u001B[39m\u001B[0m\u001B[92m ok\n", + "\u001B[39m\u001B[0m" ] } ], @@ -469,9 +468,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[94msubmit 1 pilot(s)\u001b[39m\u001b[0m\n", - " pilot.0000 tacc.frontera_tutorial 56 cores 0 gpus\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" + "\u001B[94msubmit 1 pilot(s)\u001B[39m\u001B[0m\n", + " pilot.0000 tacc.frontera_tutorial 56 cores 0 gpus\u001B[39m\u001B[0m\u001B[92m ok\n", + "\u001B[39m\u001B[0m" ] } ], @@ -576,13 +575,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[94mclosing session rp.session.three.mturilli.019495.0003\u001b[39m\u001b[0m\u001b[94m \\\n", - "close pilot manager\u001b[39m\u001b[0m\u001b[94m \\\n", + "\u001B[94mclosing session rp.session.three.mturilli.019495.0003\u001B[39m\u001B[0m\u001B[94m \\\n", + "close pilot manager\u001B[39m\u001B[0m\u001B[94m \\\n", "wait for 1 pilot(s)\n", - " \u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94msession lifetime: 13.1s\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" + " \u001B[39m\u001B[0m\u001B[92m ok\n", + "\u001B[39m\u001B[0m\u001B[92m ok\n", + "\u001B[39m\u001B[0m\u001B[94msession lifetime: 13.1s\u001B[39m\u001B[0m\u001B[92m ok\n", + "\u001B[39m\u001B[0m" ] } ], diff --git a/tests/component_tests/test_session.py b/tests/component_tests/test_session.py index c9260eda02..33dc9eed7b 100755 --- a/tests/component_tests/test_session.py +++ b/tests/component_tests/test_session.py @@ -123,7 +123,7 @@ def test_resource_schema_alias(self, mocked_config, *args, **kwargs): mocked_config.return_value = ru.TypedDict({ 'local': { 'test': { - 'default_schema' :'schema_origin', + 'default_schema' : 'schema_origin', 'schemas' : { 'schema_origin' : {'param_0': 'value_0'}, 'schema_alias' : 'schema_origin', From 19545bdd860fa8976f37259c4f26715ab0c03a1a Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 12:35:17 -0400 Subject: [PATCH 097/171] fixed creation of HeartbeatMessage instance --- bin/radical-pilot-agent_n | 2 +- bin/radical-pilot-component | 2 +- src/radical/pilot/session.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/radical-pilot-agent_n b/bin/radical-pilot-agent_n index e2200eb5ad..48ea687b1c 100755 --- a/bin/radical-pilot-agent_n +++ b/bin/radical-pilot-agent_n @@ -72,7 +72,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): - hb_pub.put('heartbeat', HeartbeatMessage(uid)) + hb_pub.put('heartbeat', HeartbeatMessage(uid=uid)) def hb_term_cb(hb_uid): agent.stop() diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 8c85a5145a..62a153238b 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -93,7 +93,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) def hb_beat_cb(): - hb_pub.put('heartbeat', HeartbeatMessage(uid)) + hb_pub.put('heartbeat', HeartbeatMessage(uid=uid)) def hb_term_cb(hb_uid): comp.stop() diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 846f08537a..9810aa70a1 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -552,7 +552,7 @@ def _start_heartbeat(self): def _hb_beat_cb(): # called on every heartbeat: cfg.heartbeat.interval` # publish own heartbeat - self._hb_pub.put('heartbeat', HeartbeatMessage(self._uid)) + self._hb_pub.put('heartbeat', HeartbeatMessage(uid=self._uid)) # also update proxy heartbeat if self._proxy: From d711940401dd24d70d37d7c06186e90d41d2b418 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 12:44:51 -0400 Subject: [PATCH 098/171] temporary fix for ResourceDescription --- src/radical/pilot/resource_description.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/radical/pilot/resource_description.py b/src/radical/pilot/resource_description.py index feb8f35606..a82638a35f 100644 --- a/src/radical/pilot/resource_description.py +++ b/src/radical/pilot/resource_description.py @@ -61,6 +61,11 @@ class ResourceDescription(ru.TypedDict): DEFAULT_SCHEMA : str , SCHEMAS : {str: AccessSchema}, + # FIXME: AM - need to resolve since in Session it is moved into RD + # `_get_resource_sandbox` -> `KeyError: 'filesystem_endpoint'` + JOB_MANAGER_ENDPOINT : str, + FILESYSTEM_ENDPOINT : str, + DEFAULT_REMOTE_WORKDIR : str , DEFAULT_QUEUE : str , RESOURCE_MANAGER : str , @@ -90,6 +95,11 @@ class ResourceDescription(ru.TypedDict): DEFAULT_SCHEMA : '' , SCHEMAS : list() , + # FIXME: AM - need to resolve since in Session it is moved into RD + # `_get_resource_sandbox` -> `KeyError: 'filesystem_endpoint'` + JOB_MANAGER_ENDPOINT : None, + FILESYSTEM_ENDPOINT : None, + DEFAULT_REMOTE_WORKDIR : '' , DEFAULT_QUEUE : '' , RESOURCE_MANAGER : '' , From 348addbe3b28a376befcad6af040ac516a619d29 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 16:55:55 -0400 Subject: [PATCH 099/171] fixed resource configuration handling --- .../pilot/configs/resource_princeton.json | 18 ++- src/radical/pilot/resource_description.py | 75 ++++++------ src/radical/pilot/session.py | 56 ++++----- tests/component_tests/test_session.py | 110 ++++++++---------- 4 files changed, 125 insertions(+), 134 deletions(-) diff --git a/src/radical/pilot/configs/resource_princeton.json b/src/radical/pilot/configs/resource_princeton.json index 70ecc39827..a984227f22 100644 --- a/src/radical/pilot/configs/resource_princeton.json +++ b/src/radical/pilot/configs/resource_princeton.json @@ -84,13 +84,12 @@ "job_manager_endpoint": "slurm://tigercpu.princeton.edu/", "job_manager_hop" : "fork://localhost/", "filesystem_endpoint" : "file://localhost/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://tigercpu.princeton.edu/", + "filesystem_endpoint" : "sftp://tigercpu.princeton.edu/" } }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://tigercpu.princeton.edu/", - "filesystem_endpoint" : "sftp://tigercpu.princeton.edu/" - }, "default_queue" : "cpu", "resource_manager" : "SLURM", "cores_per_node" : 40, @@ -133,13 +132,12 @@ "job_manager_endpoint": "slurm://tigercpu.princeton.edu/", "job_manager_hop" : "fork://localhost/", "filesystem_endpoint" : "file://localhost/" + }, + "ssh" : { + "job_manager_endpoint": "slurm+ssh://tigergpu.princeton.edu/", + "filesystem_endpoint" : "sftp://tigergpu.princeton.edu/" } }, - "ssh" : - { - "job_manager_endpoint" : "slurm+ssh://tigergpu.princeton.edu/", - "filesystem_endpoint" : "sftp://tigergpu.princeton.edu/" - }, "default_queue" : "gpu", "resource_manager" : "SLURM", "cores_per_node" : 28, diff --git a/src/radical/pilot/resource_description.py b/src/radical/pilot/resource_description.py index a82638a35f..8f36893229 100644 --- a/src/radical/pilot/resource_description.py +++ b/src/radical/pilot/resource_description.py @@ -9,16 +9,18 @@ DEFAULT_SCHEMA = 'default_schema' SCHEMAS = 'schemas' JOB_MANAGER_ENDPOINT = 'job_manager_endpoint' +JOB_MANAGER_HOP = 'job_manager_hop' FILESYSTEM_ENDPOINT = 'filesystem_endpoint' DEFAULT_REMOTE_WORKDIR = 'default_remote_workdir' DEFAULT_QUEUE = 'default_queue' -RESOURCE_MANAGER = 'default_manager' +RESOURCE_MANAGER = 'resource_manager' AGENT_CONFIG = 'agent_config' AGENT_SCHEDULER = 'agent_scheduler' AGENT_SPAWNER = 'agent_spawner' PRE_BOOTSTRAP_0 = 'pre_bootstrap_0' PRE_BOOTSTRAP_1 = 'pre_bootstrap_1' RP_VERSION = 'rp_version' +VIRTENV = 'virtenv' VIRTENV_MODE = 'virtenv_mode' VIRTENV_DIST = 'virtenv_dist' PYTHON_DIST = 'python_dist' @@ -28,23 +30,30 @@ MEM_PER_NODE = 'mem_per_node' CORES_PER_NODE = 'cores_per_node' GPUS_PER_NODE = 'gpus_per_node' -BLOCKED_CORES = 'blocked_cores' -BLOCKED_GPUS = 'blocker_gpus' SYSTEM_ARCHITECTURE = 'system_architecture' +FAKE_RESOURCES = 'fake_resources' +MANDATORY_ARGS = 'mandatory_args' +FORWARD_TUNNEL_ENDPOINT = 'forward_tunnel_endpoint' + +NEW_SESSION_PER_TASK = 'new_session_per_task' +TASK_PRE_EXEC = 'task_pre_exec' + # ------------------------------------------------------------------------------ # class AccessSchema(ru.TypedDict): _schema = { - JOB_MANAGER_ENDPOINT: str, - FILESYSTEM_ENDPOINT : str, + JOB_MANAGER_ENDPOINT: str, + JOB_MANAGER_HOP : str, + FILESYSTEM_ENDPOINT : str, } _defaults = { - JOB_MANAGER_ENDPOINT: None, - FILESYSTEM_ENDPOINT : None, + JOB_MANAGER_ENDPOINT: None, + JOB_MANAGER_HOP : None, + FILESYSTEM_ENDPOINT : None, } @@ -63,8 +72,9 @@ class ResourceDescription(ru.TypedDict): # FIXME: AM - need to resolve since in Session it is moved into RD # `_get_resource_sandbox` -> `KeyError: 'filesystem_endpoint'` - JOB_MANAGER_ENDPOINT : str, - FILESYSTEM_ENDPOINT : str, + JOB_MANAGER_ENDPOINT : str , + JOB_MANAGER_HOP : str , + FILESYSTEM_ENDPOINT : str , DEFAULT_REMOTE_WORKDIR : str , DEFAULT_QUEUE : str , @@ -75,6 +85,7 @@ class ResourceDescription(ru.TypedDict): PRE_BOOTSTRAP_0 : [str] , PRE_BOOTSTRAP_1 : [str] , RP_VERSION : str , + VIRTENV : str , VIRTENV_MODE : str , VIRTENV_DIST : str , PYTHON_DIST : str , @@ -84,9 +95,13 @@ class ResourceDescription(ru.TypedDict): MEM_PER_NODE : int , CORES_PER_NODE : int , GPUS_PER_NODE : int , - BLOCKED_CORES : [int] , - BLOCKED_GPUS : [int] , - SYSTEM_ARCHITECTURE : {str: str} , + SYSTEM_ARCHITECTURE : {str: None}, + + FAKE_RESOURCES : bool , + MANDATORY_ARGS : [str] , + FORWARD_TUNNEL_ENDPOINT: str , + NEW_SESSION_PER_TASK : bool , + TASK_PRE_EXEC : [str] , } _defaults = { @@ -97,8 +112,9 @@ class ResourceDescription(ru.TypedDict): # FIXME: AM - need to resolve since in Session it is moved into RD # `_get_resource_sandbox` -> `KeyError: 'filesystem_endpoint'` - JOB_MANAGER_ENDPOINT : None, - FILESYSTEM_ENDPOINT : None, + JOB_MANAGER_ENDPOINT : None , + JOB_MANAGER_HOP : None , + FILESYSTEM_ENDPOINT : None , DEFAULT_REMOTE_WORKDIR : '' , DEFAULT_QUEUE : '' , @@ -106,36 +122,27 @@ class ResourceDescription(ru.TypedDict): AGENT_CONFIG : 'default' , AGENT_SCHEDULER : 'CONTINUOUS', AGENT_SPAWNER : 'POPEN' , - PRE_BOOTSTRAP_0 : list() , - PRE_BOOTSTRAP_1 : list() , + PRE_BOOTSTRAP_0 : [] , + PRE_BOOTSTRAP_1 : [] , RP_VERSION : '' , + VIRTENV : '' , VIRTENV_MODE : '' , VIRTENV_DIST : '' , PYTHON_DIST : 'default' , - LAUNCH_METHODS : dict() , + LAUNCH_METHODS : {} , LFS_PATH_PER_NODE : '' , LFS_SIZE_PER_NODE : '' , MEM_PER_NODE : 0 , CORES_PER_NODE : 0 , GPUS_PER_NODE : 0 , - BLOCKED_CORES : [int] , - BLOCKED_GPUS : [int] , - SYSTEM_ARCHITECTURE : dict() , - } - + SYSTEM_ARCHITECTURE : {} , - # -------------------------------------------------------------------------- - # - def __init__(self, from_dict=None): - - super().__init__(from_dict=from_dict) - - - # -------------------------------------------------------------------------- - # - def _verify(self): - - pass + FAKE_RESOURCES : False , + MANDATORY_ARGS : [] , + FORWARD_TUNNEL_ENDPOINT: '' , + NEW_SESSION_PER_TASK : True , + TASK_PRE_EXEC : [] , + } # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 9810aa70a1..a1ffef8e76 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -4,7 +4,6 @@ import os import copy -import glob import time from typing import Optional @@ -380,28 +379,34 @@ def _init_cfg_from_scratch(self): self._cfg = ru.Config('radical.pilot.session', name=cfg_name, cfg=self._cfg) - self._rcfgs = ru.Config() - self._rcfg = ru.Config() # the local resource config, if known rcfgs = ru.Config('radical.pilot.resource', name='*', expand=False) + rcfgs_ext = {} for site in rcfgs: - self._rcfgs[site] = ru.Config() - for res,rcfg in rcfgs[site].items(): - self._rcfgs[site][res] = ru.Config() - for schema in rcfg['schemas']: - self._rcfgs[site][res][schema] = ru.Config() - self._rcfgs[site][res][schema] = ru.Config( - from_dict=rcfgs[site][res]) - ru.dict_merge(self._rcfgs[site][res][schema], + rcfgs_ext[site] = {} + for res, rcfg in rcfgs[site].items(): + rcfgs_ext[site][res] = { + 'default_schema': rcfg['default_schema'], + 'schemas' : rcfg.get('schemas', {}) + } + for schema in rcfg.get('schemas', {}): + while isinstance(rcfg['schemas'][schema], str): + tgt = rcfg['schemas'][schema] + rcfg['schemas'][schema] = rcfg['schemas'][tgt] + for schema in rcfg.get('schemas', {}): + rcfgs_ext[site][res][schema] = rcfgs[site][res].as_dict() + ru.dict_merge(rcfgs_ext[site][res][schema], rcfgs[site][res]['schemas'][schema]) - del self._rcfgs[site][res][schema]['schemas'] + del rcfgs_ext[site][res][schema]['default_schema'] - for site in self._rcfgs: - for res,rcfg in self._rcfgs[site].items(): - for schema in rcfg.get('schemas', []): - rd = ResourceDescription(from_dict=rcfg['schemas'][schema]) + for site in rcfgs_ext: + for res, rcfg in rcfgs_ext[site].items(): + for schema in rcfg.get('schemas', {}): + rd = ResourceDescription(from_dict=rcfg[schema]) rd.verify() + self._rcfgs = ru.Config(from_dict=rcfgs_ext) + self._rcfg = ru.Config() # the local resource config, if known # set essential config values for *this* specific session self._cfg['sid'] = self._uid @@ -1206,27 +1211,16 @@ def get_resource_config(self, resource, schema=None): if not schema: schema = resource_cfg.get('default_schema') - while schema: + if schema: - if schema not in resource_cfg['schemas']: + if schema not in resource_cfg['schemas']: raise RuntimeError("schema %s unknown for resource %s" % (schema, resource)) - cnt = 0 - val = resource_cfg['schemas'][schema] - - if isinstance(val, str): - schema = val - cnt += 1 - if cnt > 10: - break - continue - - for key in val: + for key in resource_cfg[schema]: # merge schema specific resource keys into the # resource config - resource_cfg[key] = val[key] - break + resource_cfg[key] = resource_cfg[schema][key] resource_cfg.label = resource return resource_cfg diff --git a/tests/component_tests/test_session.py b/tests/component_tests/test_session.py index 33dc9eed7b..b324e4826d 100755 --- a/tests/component_tests/test_session.py +++ b/tests/component_tests/test_session.py @@ -2,7 +2,7 @@ # pylint: disable=protected-access, unused-argument, no-value-for-parameter -__copyright__ = 'Copyright 2020-2022, The RADICAL-Cybertools Team' +__copyright__ = 'Copyright 2020-2023, The RADICAL-Cybertools Team' __license__ = 'MIT' import glob @@ -22,38 +22,21 @@ class TestSession(TestCase): _cleanup_files = [] - - def se_init(self): - - self._rep = mock.Mock() - self._reg = mock.Mock() - self._log = mock.Mock() - self._prof = mock.Mock() - - self._rcfgs = ru.Config('radical.pilot.resource', name='*', - expand=False) - - for site in self._rcfgs: - for rcfg in self._rcfgs[site].values(): - for schema in rcfg.get('schemas', []): - while isinstance(rcfg.get(schema), str): - tgt = rcfg[schema] - rcfg[schema] = rcfg[tgt] - - # -------------------------------------------------------------------------- # @classmethod - @mock.patch.object(Session, '_init_primary', side_effect=se_init, - autospec=True) @mock.patch.object(Session, '_get_logger') @mock.patch.object(Session, '_get_profiler') @mock.patch.object(Session, '_get_reporter') def setUpClass(cls, *args, **kwargs) -> None: - cls._session = Session() - cls._cleanup_files.append(cls._session.uid) + def init_primary(self): + self._reg = mock.Mock() + self._init_cfg_from_scratch() + with mock.patch.object(Session, '_init_primary', new=init_primary): + cls._session = Session() + cls._cleanup_files.append(cls._session.uid) # -------------------------------------------------------------------------- # @@ -112,52 +95,61 @@ def test_get_resource_config(self): # -------------------------------------------------------------------------- # - @mock.patch.object(Session, '_init_primary', side_effect=se_init, - autospec=True) @mock.patch.object(Session, '_get_logger') @mock.patch.object(Session, '_get_profiler') @mock.patch.object(Session, '_get_reporter') - @mock.patch('radical.pilot.session.ru.Config') - def test_resource_schema_alias(self, mocked_config, *args, **kwargs): - - mocked_config.return_value = ru.TypedDict({ - 'local': { - 'test': { - 'default_schema' : 'schema_origin', - 'schemas' : { - 'schema_origin' : {'param_0': 'value_0'}, - 'schema_alias' : 'schema_origin', - 'schema_alias_alias': 'schema_alias' - } + def test_resource_schema_alias(self, *args, **kwargs): + + base_dir = os.path.join(os.path.expanduser('~'), '.radical') + self._cleanup_files.append(base_dir) + + user_cfg_dir = os.path.join(base_dir, 'pilot', 'configs') + ru.rec_makedir(user_cfg_dir) + + facility_cfg = { + 'test': { + 'default_schema' : 'schema_origin', + 'schemas' : { + 'schema_origin' : {'job_manager_hop': 'value_0'}, + 'schema_alias' : 'schema_origin', + 'schema_alias_alias': 'schema_alias' } } - }) + } + ru.write_json(facility_cfg, '%s/resource_facility.json' % user_cfg_dir) + + def init_primary(self): + self._reg = mock.Mock() + self._init_cfg_from_scratch() - s_alias = Session() + with mock.patch.object(Session, '_init_primary', new=init_primary): + s_alias = Session() + self._cleanup_files.append(s_alias.uid) self.assertEqual( - s_alias._rcfgs.local.test.schema_origin, - s_alias._rcfgs.local.test.schema_alias) + s_alias._rcfgs.facility.test.schema_origin, + s_alias._rcfgs.facility.test.schema_alias) self.assertEqual( - s_alias._rcfgs.local.test.schema_origin, - s_alias._rcfgs.local.test.schema_alias_alias) + s_alias._rcfgs.facility.test.schema_origin, + s_alias._rcfgs.facility.test.schema_alias_alias) self.assertEqual( - s_alias.get_resource_config('local.test', 'schema_origin'), - s_alias.get_resource_config('local.test', 'schema_alias_alias')) - - self._cleanup_files.append(s_alias.uid) - - with self.assertRaises(KeyError): - # schema alias refers to unknown schema - mocked_config.return_value = ru.TypedDict({ - 'local': { - 'test': { - 'schemas' : ['schema_alias_error'], - 'schema_alias_error': 'unknown_schema' - } + s_alias.get_resource_config('facility.test', 'schema_origin'), + s_alias.get_resource_config('facility.test', 'schema_alias_alias')) + + # schema alias refers to unknown schema + facility_cfg = { + 'test': { + 'default_schema': 'schema_alias_error', + 'schemas': { + 'schemas': ['schema_alias_error'], + 'schema_alias_error': 'unknown_schema' } - }) - Session() + } + } + ru.write_json(facility_cfg, '%s/resource_facility.json' % user_cfg_dir) + with self.assertRaises(KeyError): + with mock.patch.object(Session, '_init_primary', new=init_primary): + Session() # -------------------------------------------------------------------------- # From 783d53716b352a1c9a61deac53f384c5ffb13374 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 17:00:45 -0400 Subject: [PATCH 100/171] fixed Session close options --- src/radical/pilot/session.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index a1ffef8e76..668ff21960 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -40,12 +40,14 @@ class _CloseOptions(ru.TypedDict): _schema = { 'download' : bool, - 'terminate': bool + 'terminate': bool, + 'cleanup' : bool # FIXME: to be removed } _defaults = { 'download' : False, - 'terminate': True + 'terminate': True, + 'cleanup' : True # FIXME: to be removed } From d7a30c3dc35a01eeab95e97024e5748fa150b03a Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 22:41:23 -0400 Subject: [PATCH 101/171] fixed TMGR Staging Input --- src/radical/pilot/tmgr/staging_input/default.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 1fbe782dab..07d44b8fef 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -205,7 +205,7 @@ def work(self, tasks): if not pilot: # we don't feel inclined to optimize for unknown pilots - self._log.debug('pid unknown - skip optimizion', pid) + self._log.debug('pid unknown - skip optimization', pid) continue task_sboxes = sboxes[pid] @@ -288,13 +288,13 @@ def work(self, tasks): if no_staging_tasks: - - # nothing to stage, push to the agent - self._advance_tasks(no_staging_tasks[pid], pid) + for pid in no_staging_tasks: + # nothing to stage, push to the agent + self._advance_tasks(no_staging_tasks[pid], pid) to_fail = list() for pid in staging_tasks: - for task,actionables in staging_tasks[pid]: + for task, actionables in staging_tasks[pid]: try: self._handle_task(task, actionables) self._advance_tasks([task], pid) From 7fe18f3049981f12ed963b786205f5e35657f1df Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 21 Aug 2023 22:56:17 -0400 Subject: [PATCH 102/171] cleaned Services counting (`startup_flag`): removed `_service_state_cb` in favor `_ctrl_service_up` --- src/radical/pilot/agent/agent_0.py | 50 ++----------------- tests/unit_tests/test_agent_0/test_agent_0.py | 22 ++++---- 2 files changed, 14 insertions(+), 58 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index cfad834395..f619f7fe13 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -206,21 +206,9 @@ def initialize(self): self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.PROXY_TASK_QUEUE) - # subscribe for control messages - ru.zmq.Subscriber(channel=rpc.CONTROL_PUBSUB, cb=self._control_cb, - url=self._reg['bridges.%s.addr_sub' % rpc.CONTROL_PUBSUB]) - - - if True: - time.sleep(1) - reg = ru.zmq.RegistryClient(url=self._reg._url) - pub = ru.zmq.Publisher('control_pubsub', reg['bridges.control_pubsub.addr_pub']) - - pub.put('control_pubsub', msg={'cmd': 'service_up', - 'uid': 'test.1'}) - - # make sure the message goes out - time.sleep(1) + # subscribe for control messages # FIXME: to be removed (duplication) + # ru.zmq.Subscriber(channel=rpc.CONTROL_PUBSUB, cb=self._control_cb, + # url=self._reg['bridges.%s.addr_sub' % rpc.CONTROL_PUBSUB]) # before we run any tasks, prepare a named_env `rp` for tasks which use # the pilot's own environment, such as raptors @@ -402,37 +390,6 @@ def _start_services(self): self._log.info('all agent services started') - # -------------------------------------------------------------------------- - # - def _service_state_cb(self, topic, msg): # pylint: disable=unused-argument - - cmd = msg['cmd'] - tasks = msg['arg'] - - if cmd != 'update': - return - - for service in ru.as_list(tasks): - - if service['uid'] not in self._service_uids_launched or \ - service['uid'] in self._service_uids_running: - continue - - self._log.debug('service state update %s: %s', - service['uid'], service['state']) - if service['state'] != rps.AGENT_EXECUTING: - continue - - self._service_uids_running.append(service['uid']) - self._log.debug('service %s started (%s / %s)', service['uid'], - len(self._service_uids_running), - len(self._service_uids_launched)) - - if len(self._service_uids_launched) == \ - len(self._service_uids_running): - self._services_setup.set() - - # -------------------------------------------------------------------------- # def _start_sub_agents(self): @@ -688,7 +645,6 @@ def _ctrl_rpc_req(self, msg): # def _ctrl_service_up(self, msg): - cmd = msg['cmd'] uid = msg['arg']['uid'] # This message signals that an agent service instance is up and running. diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index 153265247d..f9c8139007 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -264,28 +264,28 @@ def local_advance(things, publish, push): # -------------------------------------------------------------------------- # @mock.patch.object(Agent_0, '__init__', return_value=None) - def test_service_state_cb(self, mocked_init): + def test_ctrl_service_up(self, mocked_init): agent_0 = Agent_0(ru.Config(), self._session) agent_0._service_uids_launched = ['101', '102'] agent_0._service_uids_running = [] - agent_0._log = mock.Mock() + agent_0._pid = 'pilot_test.0000' + agent_0._log = mock.Mock() + agent_0._prof = mock.Mock() agent_0._services_setup = mt.Event() topic = 'test_topic' - msg = {'cmd': 'update', - 'arg': []} + msg = {'cmd': 'service_up', + 'arg': {}} - msg['arg'].append({'uid' : '101', - 'state': 'AGENT_EXECUTING'}) - agent_0._service_state_cb(topic, msg) + msg['arg']['uid'] = '101' + agent_0._control_cb(topic, msg) self.assertFalse(agent_0._services_setup.is_set()) - msg['arg'].append({'uid' : '102', - 'state': 'AGENT_EXECUTING'}) - agent_0._service_state_cb(topic, msg) + msg['arg']['uid'] = '102' + agent_0._control_cb(topic, msg) self.assertTrue(agent_0._services_setup.is_set()) @@ -297,7 +297,7 @@ def test_service_state_cb(self, mocked_init): tc.test_check_control_cb() tc.test_start_sub_agents() tc.test_start_services() - tc.test_service_state_cb() + tc.test_ctrl_service_up() # ------------------------------------------------------------------------------ From b0b7fbf4bf33167869e33e557e540bd5e3d3582a Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 22 Aug 2023 01:25:23 -0400 Subject: [PATCH 103/171] fixed channels --- src/radical/pilot/agent/agent_0.py | 3 +-- src/radical/pilot/agent/staging_output/default.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index f619f7fe13..99a3e7d18b 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -196,10 +196,9 @@ def initialize(self): self.register_output(rps.AGENT_STAGING_INPUT_PENDING, rpc.AGENT_STAGING_INPUT_QUEUE) - # listen for completed tasks to foward to client + # listen for completed tasks to forward to client self.register_input(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.AGENT_COLLECTING_QUEUE, - qname='default', cb=self._proxy_output_cb) # and register output diff --git a/src/radical/pilot/agent/staging_output/default.py b/src/radical/pilot/agent/staging_output/default.py index 5cdeaac5df..37e429cd94 100644 --- a/src/radical/pilot/agent/staging_output/default.py +++ b/src/radical/pilot/agent/staging_output/default.py @@ -360,7 +360,7 @@ def _handle_task_staging(self, task, actionables): # all agent staging is done -- pass on to tmgr output staging self.advance(task, rps.TMGR_STAGING_OUTPUT_PENDING, - publish=True, push=False) + publish=True, push=True) # ------------------------------------------------------------------------------ From 27cb7e979f5dbc3064b25d3c544a6b230779263d Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 22 Aug 2023 01:25:41 -0400 Subject: [PATCH 104/171] fixed staging --- src/radical/pilot/pmgr/launching/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index ef7a4b0bd0..2fb918abba 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -424,8 +424,13 @@ def _start_pilot_bulk(self, resource, schema, pilots): for fname in ru.as_list(pilot['description'].get('input_staging')): base = os.path.basename(fname) # checking if input staging file exists + if fname.startswith('./'): + fname = fname.split('./', maxsplit=1)[1] + if not fname.startswith('/'): + fname = os.path.join(self._cfg.base, fname) if not os.path.exists(fname): - raise RuntimeError('input_staging file does not exists: %s for pilot %s' % fname, pid) + raise RuntimeError('input_staging file does not exists: ' + '%s for pilot %s' % (fname, pid)) ft_list.append({'src': fname, 'tgt': '%s/%s' % (pid, base), From 10bb9068c3c9044d9bf20c103793287bd50dbcad Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 22 Aug 2023 12:12:43 -0400 Subject: [PATCH 105/171] cleanup for `hb_term_cb` --- bin/radical-pilot-agent_n | 2 +- bin/radical-pilot-component | 2 +- src/radical/pilot/session.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/radical-pilot-agent_n b/bin/radical-pilot-agent_n index 48ea687b1c..ba3c8b6502 100755 --- a/bin/radical-pilot-agent_n +++ b/bin/radical-pilot-agent_n @@ -77,7 +77,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): def hb_term_cb(hb_uid): agent.stop() term.set() - return None + return False hb = ru.Heartbeat(uid=cfg.uid, timeout=hb_cfg.timeout, diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 62a153238b..04979796d3 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -98,7 +98,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): def hb_term_cb(hb_uid): comp.stop() term.set() - return None + return False hb = ru.Heartbeat(uid=c_cfg.uid, timeout=hb_cfg.timeout, diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 668ff21960..6019cc561e 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -569,7 +569,7 @@ def _hb_beat_cb(): # -------------------------------------- # called when some entity misses # heartbeats: `cfg.heartbeat.timeout` - def _hb_term_cb(): + def _hb_term_cb(hb_uid): if self._cmgr: self._cmgr.close() return False From 799868ed0e819157d52c156b938b9097518ceb1b Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 22 Aug 2023 14:49:51 -0400 Subject: [PATCH 106/171] updated branches --- .github/workflows/docs.yml | 4 ++-- requirements-docs.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e06607757f..0d27ee4a1b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,10 +9,10 @@ name: 'Test Jupyter notebooks' on: push: branches: - - feature/nodb_2 + - devel_nodb_2 pull_request: branches: - - feature/nodb_2 + - devel_nodb_2 # This allows a subsequently queued workflow run to interrupt previous runs concurrency: diff --git a/requirements-docs.txt b/requirements-docs.txt index f1beeafde5..0ce5a045d9 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -8,7 +8,7 @@ nbsphinx==0.8.12 python-dotenv[cli] sphinx_copybutton sphinx_rtd_theme>=0.5.1 -radical.pilot @ git+https://github.com/radical-cybertools/radical.pilot@feature/nodb_2 +radical.pilot @ git+https://github.com/radical-cybertools/radical.pilot@devel_nodb_2 myst_parser docutils==0.17.1 From 6de14d2dfc5395a62eff1ef255f2069424e124c9 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 22 Aug 2023 21:10:10 -0400 Subject: [PATCH 107/171] cleaned and reworked CI configs and requirements --- .github/workflows/ci.yml | 162 ++++++++++---------------- .github/workflows/docs.yml | 9 +- .github/workflows/run-rp-notebook.yml | 18 ++- requirements-ci.txt | 25 ++++ requirements-devel | 7 -- requirements-docs-ci.txt | 13 +++ requirements-docs.txt | 3 +- requirements-tests.txt | 12 -- 8 files changed, 117 insertions(+), 132 deletions(-) create mode 100644 requirements-ci.txt delete mode 100644 requirements-devel create mode 100644 requirements-docs-ci.txt delete mode 100644 requirements-tests.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb86ae537f..457e298959 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,6 +8,7 @@ on: branches: [ devel ] jobs: + linting: runs-on: ubuntu-latest steps: @@ -22,10 +23,7 @@ jobs: python -m venv testenv . testenv/bin/activate python -m pip install --upgrade pip setuptools wheel - python -m pip install git+https://github.com/radical-cybertools/radical.utils.git@devel - python -m pip install git+https://github.com/radical-cybertools/radical.saga.git@devel - python -m pip install --upgrade . - python -m pip install -r requirements-tests.txt + python -m pip install -r requirements-ci.txt - name: Lint with flake8 and pylint run: | . testenv/bin/activate @@ -41,22 +39,8 @@ jobs: flake8 $FILTERED pylint $FILTERED - notebook_integration_test: - strategy: - matrix: - python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11' ] - uses: ./.github/workflows/run-rp-notebook.yml - with: - python-version: ${{ matrix.python-version }} - notebook-name: 'getting_started.ipynb' - tests: runs-on: ${{ matrix.os }} - services: - mongodb: - image: mongo - ports: - - 27017/tcp strategy: matrix: os: [ ubuntu-latest ] @@ -65,53 +49,40 @@ jobs: - os: ubuntu-20.04 python-version: '3.6' steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m venv testenv - . testenv/bin/activate - python -m pip install --upgrade pip setuptools wheel - python -m pip install git+https://github.com/radical-cybertools/radical.utils.git@devel - python -m pip install git+https://github.com/radical-cybertools/radical.saga.git@devel - python -m pip install --upgrade . - python -m pip install -r requirements-tests.txt - - name: Unit tests - env: - RADICAL_PILOT_DBURL: mongodb://localhost:${{ job.services.mongodb.ports[27017] }}/test - run: | - . testenv/bin/activate - coverage run --source=radical.pilot -m pytest -ra -vvv --showlocals tests/unit_tests/ tests/component_tests/ - coverage xml - - uses: codecov/codecov-action@v3 - if: ${{ matrix.python-version == '3.7' }} - with: - files: ./coverage.xml - # - name: Integration test - # env: - # MONGODB_HOST: localhost - # MONGODB_PORT: ${{ job.services.mongodb.ports[27017] }} - # RADICAL_PILOT_DBURL: mongodb://localhost:${{ job.services.mongodb.ports[27017] }}/test - # TARGET_PATH: 'docs/source/getting_started.ipynb' - # run: | - # . testenv/bin/activate - # python -m pip install -r requirements-docs.txt - # python -m pip install jupyter - # jupyter nbconvert --clear-output --inplace $TARGET_PATH - # jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m venv testenv + . testenv/bin/activate + python -m pip install --upgrade pip setuptools wheel + python -m pip install -r requirements-ci.txt + - name: Unit tests + run: | + . testenv/bin/activate + coverage run --source=radical.pilot -m pytest -ra -vvv --showlocals tests/unit_tests/ tests/component_tests/ + coverage xml + - uses: codecov/codecov-action@v3 + if: ${{ matrix.python-version == '3.7' }} + with: + files: ./coverage.xml + notebook_integration_test: + strategy: + matrix: + python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11' ] + uses: .github/workflows/run-rp-notebook.yml + with: + python-version: ${{ matrix.python-version }} + requirements-file: 'requirements-docs-ci.txt' + notebook-name: 'getting_started.ipynb' analytics: runs-on: ${{ matrix.os }} - services: - mongodb: - image: mongo - ports: - - 27017/tcp strategy: matrix: os: [ ubuntu-latest ] @@ -120,41 +91,36 @@ jobs: - os: ubuntu-20.04 python-version: '3.6' steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y texlive cm-super - sudo apt install -y texlive-fonts-extra texlive-extra-utils dvipng - sudo apt install -y texlive-fonts-recommended texlive-latex-extra - python -m venv testenv - . testenv/bin/activate - python -m pip install --upgrade pip setuptools wheel - python -m pip install git+https://github.com/radical-cybertools/radical.utils.git@devel - python -m pip install git+https://github.com/radical-cybertools/radical.saga.git@devel - python -m pip install --upgrade . - python -m pip install git+https://github.com/radical-cybertools/radical.analytics.git@devel - - name: analyze example session - timeout-minutes: 5 - env: - RADICAL_PILOT_DBURL: mongodb://localhost:${{ job.services.mongodb.ports[27017] }}/test - run: | - . testenv/bin/activate - ./examples/00_getting_started.py local.localhost - SID=$(ls -rt | grep rp.session) - echo "$SID: $SID" - radical-analytics-inspect "$SID" - mkdir artifacts - ls -la - cp -R *.png *.stats artifacts - - name: upload artifacts - uses: actions/upload-artifact@v3 - with: - name: artifacts - path: artifacts + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y texlive cm-super + sudo apt install -y texlive-fonts-extra texlive-extra-utils dvipng + sudo apt install -y texlive-fonts-recommended texlive-latex-extra + python -m venv testenv + . testenv/bin/activate + python -m pip install --upgrade pip setuptools wheel + python -m pip install -r requirements-ci.txt + - name: analyze example session + timeout-minutes: 5 + run: | + . testenv/bin/activate + ./examples/00_getting_started.py local.localhost + SID=$(ls -rt | grep rp.session) + echo "$SID: $SID" + radical-analytics-inspect "$SID" + mkdir artifacts + ls -la + cp -R *.png *.stats artifacts + - name: upload artifacts + uses: actions/upload-artifact@v3 + with: + name: artifacts + path: artifacts diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0d27ee4a1b..9f90d1b0b9 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -8,11 +8,9 @@ name: 'Test Jupyter notebooks' on: push: - branches: - - devel_nodb_2 + branches: [ devel ] pull_request: - branches: - - devel_nodb_2 + branches: [ devel ] # This allows a subsequently queued workflow run to interrupt previous runs concurrency: @@ -24,6 +22,7 @@ jobs: uses: ./.github/workflows/run-rp-notebook.yml with: python-version: 3.7 + requirements-file: 'requirements-docs-ci.txt' notebook-name: 'getting_started.ipynb' test-tutorials: @@ -42,5 +41,7 @@ jobs: uses: ./.github/workflows/run-rp-notebook.yml with: python-version: 3.7 + requirements-file: 'requirements-docs-ci.txt' notebook-name: ${{ matrix.tutorial }} notebook-path: 'tutorials' + diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index de5b9a2251..35567a4da9 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -7,6 +7,11 @@ on: description: 'Python version for running the Jupyter notebook' required: true type: string + requirements-file: + description: 'File with dependencies' + required: false + default: 'requirements-docs.txt' + type: string notebook-name: description: 'File name of the Jupyter notebook' required: true @@ -25,11 +30,6 @@ on: jobs: tests: runs-on: ubuntu-latest - services: - mongodb: - image: mongo - ports: - - 27017/tcp # will assign a random free host port steps: - name: Checkout repository uses: actions/checkout@v3 @@ -43,16 +43,14 @@ jobs: run: | python -m venv testenv . testenv/bin/activate - python -m pip install --upgrade pip - python -m pip install -r requirements-docs.txt + python -m pip install --upgrade pip setuptools wheel + python -m pip install -r ${{ inputs.requirements-file }} python -m pip install jupyter - name: Run Jupyter Notebook env: - MONGODB_HOST: localhost - MONGODB_PORT: ${{ job.services.mongodb.ports[27017] }} - RADICAL_PILOT_DBURL: mongodb://localhost:${{ job.services.mongodb.ports[27017] }}/test TARGET_PATH: ${{ format('{0}/{1}/{2}', inputs.documentation-path, inputs.notebook-path, inputs.notebook-name) }} run: | . testenv/bin/activate jupyter nbconvert --clear-output --inplace $TARGET_PATH jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH + diff --git a/requirements-ci.txt b/requirements-ci.txt new file mode 100644 index 0000000000..752abc7003 --- /dev/null +++ b/requirements-ci.txt @@ -0,0 +1,25 @@ + +# provided branches for RCT components can be edited, +# if there is a dependency with non-devel branch + +# default RCT branch for CI runs is "devel" + +radical.utils @ git+https://github.com/radical-cybertools/radical.utils@devel_nodb_2 +radical.saga @ git+https://github.com/radical-cybertools/radical.saga@devel +radical.gtod @ git+https://github.com/radical-cybertools/radical.gtod@devel +radical.analytics @ git+https://github.com/radical-cybertools/radical.analytics.git@devel + +-e . + +setproctitle +dill + +pytest +pytest-timeout +pylint +flake8 +flake8-per-file-ignores +coverage +psutil +pudb + diff --git a/requirements-devel b/requirements-devel deleted file mode 100644 index c68c03d58d..0000000000 --- a/requirements-devel +++ /dev/null @@ -1,7 +0,0 @@ - --r requirements.txt --r requirements-docs.txt --r requirements-tests.txt - -jupyter - diff --git a/requirements-docs-ci.txt b/requirements-docs-ci.txt new file mode 100644 index 0000000000..572291160b --- /dev/null +++ b/requirements-docs-ci.txt @@ -0,0 +1,13 @@ + +-r requirements-ci.txt + +sphinx==5.3.0 +ipython +ipykernel +nbsphinx==0.8.12 +python-dotenv[cli] +sphinx_copybutton +sphinx_rtd_theme>=0.5.1 +myst_parser +docutils==0.17.1 + diff --git a/requirements-docs.txt b/requirements-docs.txt index 0ce5a045d9..24d33e983b 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -1,6 +1,8 @@ -r requirements.txt +radical.pilot>=1.36 + sphinx==5.3.0 ipython ipykernel @@ -8,7 +10,6 @@ nbsphinx==0.8.12 python-dotenv[cli] sphinx_copybutton sphinx_rtd_theme>=0.5.1 -radical.pilot @ git+https://github.com/radical-cybertools/radical.pilot@devel_nodb_2 myst_parser docutils==0.17.1 diff --git a/requirements-tests.txt b/requirements-tests.txt deleted file mode 100644 index 109932dafe..0000000000 --- a/requirements-tests.txt +++ /dev/null @@ -1,12 +0,0 @@ - --r requirements.txt - -pytest -pytest-timeout -pylint -flake8 -flake8-per-file-ignores -coverage -psutil -pudb - From 45c047af8276bbd08428920757eae8bc7c31de68 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 22 Aug 2023 21:14:45 -0400 Subject: [PATCH 108/171] fix --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 457e298959..8ade818cf6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,7 +75,7 @@ jobs: strategy: matrix: python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11' ] - uses: .github/workflows/run-rp-notebook.yml + uses: ./.github/workflows/run-rp-notebook.yml with: python-version: ${{ matrix.python-version }} requirements-file: 'requirements-docs-ci.txt' From 9eda4cc528417aec138323a91df7086dc553aa85 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 23 Aug 2023 11:29:46 -0400 Subject: [PATCH 109/171] fixed RP installation within `requirements-ci.txt` --- requirements-ci.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-ci.txt b/requirements-ci.txt index 752abc7003..bf123663fc 100644 --- a/requirements-ci.txt +++ b/requirements-ci.txt @@ -9,7 +9,8 @@ radical.saga @ git+https://github.com/radical-cybertools/radical.saga@devel radical.gtod @ git+https://github.com/radical-cybertools/radical.gtod@devel radical.analytics @ git+https://github.com/radical-cybertools/radical.analytics.git@devel --e . +# RP from the current branch +. setproctitle dill From 01386893059c26aa5a9a4f330906a4f2f1174da0 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Thu, 24 Aug 2023 00:09:58 -0400 Subject: [PATCH 110/171] Address comment --- docs/source/images/architecture.png | Bin 86515 -> 91863 bytes 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 docs/source/images/architecture.png diff --git a/docs/source/images/architecture.png b/docs/source/images/architecture.png old mode 100755 new mode 100644 index 1afe03efb054e8a67d88df4d092598ab279b5315..30ebe0446a75d9ce82d79b746d5e2c64e6479391 GIT binary patch literal 91863 zcmeFZcT|&G_ca;|f(U|#rlKGgKmqBfQ~?o@Dkw-tkuJSS3rGcUHk6r-wVU7gnKdtC9*TJkyoVHm()b`8m4Tr~GMU zDA@l0KmPAt1M{O6^TlXrEY4GF~^+ zK$rHdZm+hHo4ie-s-rP!%rH{>z7kKES$8(-ttpJ4J6j{Cy?; zuPnXW7cSXj;9)Ij)svUxd6~~~N~y3%$Lbtnl@7LXO^t<1qOG)x@&MVl;fyXX&lR@C zxgs(8*q77HBOBM%%#;N@)*m_!SeM4~dX66A2~_h~M7nm=KRt3LtlJRkMrB^yli)gh zOwTsrn4oDFY!bRP&lFLAGjv*=>nQ!VVSPqFY)->q`yurw9A9XgB{(;$h|SY?J$r2H zh0z5gs){dP>u3{3>(NDge8);$3Y-vLK{ZtQ)} zOIKUPICdnr<_g34zNCe#P|m6(h<3+2G@d`GW#DGwpj^}aj3d!}z0VBhZ$DmlF4u1O z(Ri?X<>u-%df*v~h(%0AY8mi@!7{FFeDMo+AwOYv^)CSnt~hT)#YEcKVUNXmN86^! zXhW7R(pL8u%pO}j1b2(gmb*ErI8Z1*a`(tp28UJRK#AgHL!yb z+c68w4H2q8&mI-ioMzR|CTvH6NB~QIZu%S&hg_tdg>E((mPkL|4D{XzJR7g=I^>K& z>(O^{zPnXP&hjWpHTFHX_2%lpQ{_uTT}m)XiL%m9bTWhq4yl6+Y=U$8b9EQTYTMD{Z!D*iA*Wneaokpfed4%9D zV`j1GtWTffMO=j;&}a-W!l7n&3_N<<SbKlwjzwn%_Da{VNf1xTjlDN^ z61CTgxQczHzq_NO7keSnf!YIB-a9yNm|o|F)2QmBf~HCiE)l+TaH9e&y(nSMOiJHv$&zw>5Pzxkz0;Ie4-H zldp(B%IWXEP(Ix}vfVOr*14GS=Hm$6d?V$1`u#N{lm;Qnp98Y3o%>-d&SBM+|! zA2$){H&4Z5O1|b@*sR3%=!SbN6r&BiJS)@S)g67RHYec^)TQrKP#I5qZSw(Md=wfd#;Bx0(D~JoOr~&b{c3Ek$fTNU| zx8)xP8zyr2uY7j7jOACOFxgkw<~b!O9V?d`*CCTdygc^wOc=DVWU?5e#OVPe%5X|} zlld~+l--*W`K0QN?^KW)*`B(zrMI&j>K(6#KFZm%0oXl z%t@asY+FP|clq{0exidLxLKO<7zC`#zBc#}Zxvs8Mq%K#NEGmvBH5{@nt7R|!O9`#Rp_(p~kzR*F*YJJ6t=C19NOCz40gKpCaQ+b80Ijm#w+S;9woh|Rr z{%WG02F|UQ$6q8hr&#;-yJRsqx+||&9ENG>A}^wXCvA7uZNp1!Dv4^|l=mC+etCLP z7p}x9q^>Q}*`XvfsH8c24n@SEd`p7;)kr=wTogCxs7lZ|!`4+ko%p)z^K(uVm{b;% z!6UZp!sR+_s%X+5?==#I+ZVx&3`a@qPE`55nv3B4U!nQ8hsytwE_c* zEtDCnXMR3~Fnf3t{@WC$6I=}hX1v=xhUxaJ$<#zNVqH6 z?4be=T_5DyYItg4&h2Cqp&3w)KA8Oc)=%Fpd$N*et`tSER%0&;XgqMMGgEa@Lqv^2 z*sL`^%`+EXGV6$ZMLTQEx^-85x{leFs!>G(+m$w#DMheQ>)`~$kNL4~8>y=X=53h} zlI;}c*9RTjl9aXwPy}7|MCYCe8L`O4xAiRG))D$krtj3uyL~iNjokR#SG)Uw*=P*> zB~z^R%UPNfNHVld=Nb|$|DxKS_9mLaFyO?F*99v!qLzbg*UBZ;DAIW>MP+YP7o~jS zErsRd2#elQoK_LnWwSa}r6jU5PqpZ`OgiomaJbD<$@41nhef|8B`pk=JBCM^T|hq` zftKGbGJn-)o*Eq1E#D@&^-#?Qvw`+J4xG=t+Q7x)Sq!*ns2e}}AEXO|R<9;3u9d4$ z+k zwVAdjYNCsuMlv$Gm6;#Dk^W0Y-($Y`G1EP*W5`9X#awcuS$SAVrtCWcek?uOKp<3# z_~<0_Ul!+LJoJYNF6(ydEg}Ql2dhn6<1HAu?;5dq%)NaO5K+>+xhDkQAtD*z3GO{v z2W8_-l$Fx;P?m%>x6g7+u^cc@fh8^?aCQ>esea=OdVLuD)@ONE>k!e|tfb(^LGli})8jeEX46p%RNWl-exSgy_5UnMjKTy+7e$8zBDL zw%QHJ>zy7l$sx6=0P`??RYFkS6P>^1%(Ieo^%KoNH3$RH81wX3bvL|sfB{TP8YiJK zBN|m3V?m~Ky>GGZE}jfF=jlZk%Eo;~7NoxtMs0b!Vk|$t#%q?#2n;p34}(ytMV3Z_ zelV0a=jbnAef!p_q{2x0{<7TH1?@R!$*32k`$Z`5qcq;;k~^Diuf5j$>QC6c+oN## zzSd6Ky?sXm_;mu(MJ&aj+Zz*f{lx8QDROI1`gYH)fe{r=jcoYB40JCnRP%hp&Q(3T z8Zv3!rmk#_mT1(riQ^}>z_Yk*aQ#`4?vTVsW6zTE<;-*}%(R;i`2Z~=M#RKDW~ARgoBYEubRn7mw)!CZF+Ds z^QXsYq++9|e>}`=z^)Y)`ph^Vmr?Nj4=gdDahoKa8IHeB2<3bK*X_= z>+pTq&W}nAdstVe+f@70y&PD41dd8YoPYgu1`wb1zsJ>(BqXQbvTSU@&j&yt=DdwY z+iDpPe}>BtNPp1(nW21=>&G@%TRP5bDAkiO#Y|?#-;YZ0$e)Q3YT%BQ5X;gP;`f-j zyeMAiwDO>EVuGl4H`-Y+;1Q0Y!5n`&S256Lk{ukjgL4&Ab8o| z(ye#2_dcgQSV3>T3Oy=#-#?ppJG^n0-?;fXoeG~~qNl)Isy>x4Z#O2vW5Hs$d@|~d zm)Nt0pmXf1{BCbQOq^%uje076{?Y89t(5UFHosk##&1P}9Fipcd$RVSO{{d-Qgg9L zWATAi7iVWNZa8CgskWtYJoHj-UFf9*au)N7OCwdKt)iG129CrSrPBnW4%>3Uc!=zK z#pxsRXLov>b;^TO_Xx{-JO=5+4&MdF6i2s!nGRmmX0fq>e?y)Z`7Sm-e#D1?!in>+i9m5BBqe>p#_uTt@sp~NB{6?I^vyb6* zrB>4k;Y7{Ff}oig5uP$L#s*wGzj6N*8RQhr@8&voZ8|rm7_GwY%hh`MOw0penR+k!c`A1eCD)X?GQY#7j7W*YJS+E=|g=^Ee#Fc_DM|D-TN!_yVj+%-jRW@ zjgAqAUtaIkffSF25b~joDG3G;$jTopF2l%^1KX1>sJh*7N01w}()d8V54lib+EC>- ziac;ygjYh>QD?c};cV9ft5Ez**;>bvWH4=b^iheA6X&k0TCaq8D#MGT_}x$$r?Ev7 zZ|g1Z^?rI~+DCn9Kh8YD<LW@X~GgMj0;Qkst=|RPviqkdZ)5N&v-JQ?JLlpa6LztP?hPdJG{}}x1 z39%9pi@3}DF=~_2)2U~}o8YB~Nh9|KN#0W=@JM01z8LH~@@@JbQyk;(FL$Z%ViDQ! z63T^6s(DXSxVz4f;}bou+@!s{SE{EWkRspT9EVTYT>)=sfmfOb5?de@oTF{pCNi|> zrYdkmcjeQW8i~R;2F-lf0?&@L{t2fhp4WIzTujx6uiy;wyIWQ=c_~|$Xm=WFymfc? z=*deo1!EA+kwBT$DJT>E|%NbYtDG|K@OQHZbWLvb!MUgJb=@W$!og` z@3oca;p<`g{Y}6|$USOU6=ulIOc$-s?$2NK+2<>{8iqauxytpscYGQYvf74Fe%Eiu z_`E6SOsEoG-=+1+w_!c5->ILBCdW0W@bLz+^cVfu;_oAKbZdzfOG|nwz7OF~*kRot>&Q3$b!Pcavc>ohXbQT0DTW@{bYE zhTb!X+eq98X}|Et8Ay4`a8QPjZHQeQ5j<3trV$f;Cd9m7aWwHL45}Sjt8v6&mdmSE zQmoAI%!dgDYq4x9~Tzg>(RF*8{4OxOo0dj6*+taNGA zy*PRgc%OINUSE@q1R{ehrDjzLIo=?#{cw#fV#6JSeUGD^DqIG5j`KRJ zzEDMdn8>z5>kmw&kFO6{CAVa}jw>DZ6fDz9EpqhKS-A~IxL6{f+tshriSaqcEQ5z2 zNmu`H`hgNLrB-bceh2Tcbg2rIKV0!Ks(R4r!s{^~IiPrtwAL0MHN=p4;-mg)h{*AJ za-7AJ)8g>P83n{SH|6f#p(Anpe>bwwkGxez6_@(bMC>aj!i?0rslYVA+G7`mX&)(Y zkPgGms)tqf^k8keM0jb$!IwwGXQmUx7OB7~AZpr7!LCh6#_Lxu z8oZ{`+`8pmBXOLJcjg1h{qeg!`>YXU5CwBh- zBZ%NZ-QK#1bdakme=L7fnA>XetaYg}yLzr`e*<^rOomZG&E_r4ouOlQ{?%s3Bb7Y?UXz6KNp1Jt{n9QjB$Yf6#CCV+SJhMxEK6 zym7%#i5*p6k$SAKS(tw1ye3y}2XffmwC}?+X{q_MEgCR0k*A4W2z)87!)t5fXxcyG z2&1n{twk25lV$c99$i`Z2hDQmJNfR=0T;h$r=M{bzuWQlCOqbKr2I*fsODI@BA(}W z#k0MKU9{RcXJ6bUD$w zw!%S77pZ0sGroP)L2w#5O4@qw-}hw~ZT){RO5J_l|Ml3pzn%+Gb>9F$)czzaRr<=B z-1wwnP-OB1Lu1YV*|3-D!`e(|3&>sj>9q{KoZH)s>}o+wVQ}CKRh)>mj-4UEo3&yL z3A0*L00HK+t==?k&(EvcUTKxuLrMvv!pj%hc&j(= ztC1VE-!0;d-FAjfoFTdJ9I5WK0{7;lg=7A7&0!ocsx5OS*%n2t`*fm5oU2XwxrZYv zzuC=m5M7^ErA!pX4Yb9$BDvPuYm+?N_CU;k@g{t<{%ZRix8_U9aoBPI>1Zo=)*^Ak z17;6d+`lF#sZ_dQyS4nRf|q)YL$S;5xFJ_^XUG9RR~uw^-mpdHa6?bltqiM0r1JD= z;OpH36wGQXN+aIx1hQS*1m_-pE){Y9*lTz~@q4wE4z~|S8c@zdJ=)m5wz!Xnm=1Uq z-yAckKbgApJ;~sABTbr7H3;Tbmj}=b1J*P0arww~oin6#Bkou9LFLaZLjqX^(Rz-- z<+C}tv*@CJdGv=gh43&ds|)__UmlY;16(e0qlkDw%)g_N!>z0s@Aq0v<$hzt`G&^h zD6hVDM~8Zo*7z&Z2`)+2uXNHgaJ7q2TxC+#k)r1tN*n*h&(s2I#Sv_R);ANS5z8CSaD;h<&2vFfZbu6Ya?o1Pd8-T$Kk6{NzHxor z$2Ljkwb_|RoF_SZ(FteB=(AJEy@)%LBN_34tu>M1y*?9%8LlfITque%2xYP{xLd>g1e_=F+HdO1bGgZWn= z-#?mu(oo|fa3mWp^Hq)=BeVK0{3iicf@?m#nFzq~fhq`O;~CML^iE$@M!$!r0H zlf0lear+6!8YK@sr@2+cqm>4Zt)YH^#-&@WL+le&C!#L}G0R9quMCusvwFTU$megQ z)GsnoGB!4x7Hks_*G9`Brn55>b4oQ>Ef8YV3HECnDO9L6$(J_{li*u9$XoTi@p0%j$N}M|Y#PLACsK1B-G^f+64t4pBZYQ1UiKH8!U0Ze@}8t`dI&z?MO z=`B-x*(|a{jQb}knb}nn^$@8(H9Y9DVf*@%UYbhU^g0$*1j&ynazSD9rrP6`-`U~B%73{<7KmCSaf%As2wl0=|(Pu(q3jbtplPdbW4x^-G(*Kai)>i?T;sI zkn*fvLdVXakQ*(wPz|ZUDwIchGS1g~v1r;z;m}t-gGZxej=Mmyzd?AVdaHi+xgvdy znpBCRA-TnUJ6`*0<7d&O5||rii5ua$rgsepI>6_73^})eAmqIV^#{JSf;{|=f600l zEMyja*A905WptvojMbpx3xHLCU`oVIp*o{ShieCY`*EVup~NWh^}9S&%vU{N z`=2w79uT%!N@w6(IHsv>E*w0P*$P|1q$eAFu+FS5@a2leAW+rA>^%PJcT?}!5}il$ zpI~^8S_=6eH-E9#%%HRGiV*5QCS#GF^0dMv@$pc<%cM#* z!=QQY0BmP_I*sP!He3KMy1W?6D6@G_by(21!%miSbG<2`5`Jsk2!YX9opUDR)$vM0 zXBjC^pX(FwW`@o73Y_%GuSGg58>jK;=`^GX1Q_PiSZ_LL`2s)e+(234wmNlg^wV5l zA!btNyvcw?K#2a4%xr(o+j8hqW+$C1V4(L+uBW%52N^_j+?N759`(Pv!NuCfIrgX7%ShNg%mCB44@6UrR z-R{!x^Ps*)N(9pc!|yNOe`ev?Ka(E0yTS`sWb-5MsLikJg}nSF8F(3JD#JO7`0#cc zk4EL=#%+x?PTg;!XWy_6&`j)g%+cGXy;<|P@?-Xqww5nXC@jpVo^36nPzLg&Vfp9H z3()i%Y1WEPe)MF5>lN=0dFbzn9%)@)3?+b@Sl2ca(nho6Pq;00$=~7DDU&U=r;~|I z6N%oF2sOvI(ps7n=Y&{F5P9hAJ5D8fHEFs%A$|=3mDc=R{thFE<8(XPmhnR`8}+F( z)jAB&W#*nkCH>FMn+R%0o^U8mwIQrowQZ4es;QPyeTB+6kKu5o4^e<%v}@yjQW-rN z&fd+QscX~MC0}0uGm#iTezmeJY>C@NItIFT&UC9xUm+t*C6oP`a<8;Mq?o)<d_l~r$mk&z%0zZ zEhp2o1JqR_#DsrMXqw zNWRpzngli6Zgrqsd^Lc#Xp);g`i?wHtkV0a1V@!G9N$ow1A%Xu0O%s$c+6g#ermE^ z_VxOYoRxyttvcgAHVto?xekT7HDJgfNen-8$Ez%H2a^XQ`2kNM?XWn>sy?4m=(sJS z%@*OC>w@t8%Lej_!J$tu&Yv9yt-QV@%enmNOz2tir+^U=Jvt0uJn-19J_SXabL%-p z^0sUpxuCxT&9gSjQ$@mEJ@zyY1t!D_a0gn9u@0aAIw`zoPj65f30xA`BZ zP8OPRp{Xg+trPm(lh#TdXzlFa7hRRC;9Yi4y+7(L z2{UfH-0K)8!kB>da8Z>4s zP^}^(lFEiQ()p1S3vf#?g*5aR=IPD&+FaQIm1gFu1SZ>P+KcFAu!GscfvI{ z(h`tBzChExIKyI|->tr0u>iMU(y~2I+r`l`lW3v4e*a+dTB3bh*@{-fSB*i=X!23p>}nNM`zhq7zwjfRsk`!IUOb z`a|&=sM!&e8RH{`Og>&_o8I4G&6#1#+_Mkd#HQOW0Xy|7*6={J=}l~ll0(cHerEpd zRFr%DE7KJN{Rc3_aup)S;Q3xPPEw?WEx#!A8QsV^;8OSE7*>Fck4m;G%sxXuR^~jR&sAB}p-3DQxRj)J*$ja> zmSu~h!#;eJn$)0glJU+0I1y0vg5z)>CQ*2Ou00v08H2a}u7pUa9`G|iUEoQ)yJ?fqRz*iPkg zI$o~7?}E+Uv2oTc__GS@%t*zB38_Qg3=y9wll?V`8(tgpuhU=ct7G8aE9g=>0do925LOH>COG7eU-uiNk?8OhZ6RTj6%apgROS?G2b#nmAZhQr)C z#Sx4#GtL+MnZLF>2fvazCl%4DDR^=!(b>GH!-bZdDTTkHs3I@Birp<=Ni1#_>0Av)m7<>H)3EBCb?7LqFL?gxxy!$v0 zDNeNt>o0nHdz~(^e80aFB&#(a6u(w9&RX(j+6Za46a)9CKX1)V=I;j|;F`hF7%fO+xY4VHtw{oGS2{Yd3+x7=erfs%1`_7v`6uM3BU-WI&BT!q~s;h?o;r~Nf%dU$4>u-iK z7d3m&E7+a#2fA^e(Wk%DWn*mqjZbJvy6`7werNig2hv_(mLW;VkB0m@7C?i}B9rwH zr1U(AQX_y3{ph4;_gEX+cs^siYPGqeA-rWQ6Lxr(5jbw{Z`kHHxx<*nvSV9pI@-`y z^kNm?^4qyVN9Fo71=EJPK52c3kMOg<&naV)))J^I)CYg6D~g!Zk;6ckHHdAl>Q@XapOFK2rH`@{k;+_bUEj17 zWyXtVo&Kh+NRU7AYFig=op1+6$kVQ}yVtIj`vJ%$!U1>noy_M>WRzNcq3zr*jg-6| zA_Rn9cW`elw;y%7eR-VWvD)H+$tw)ewjX<@@N^1C4&l4|k7ZnWqDI165l`O!n@f1UkrGL@ANbl6jQ z9;w02jo~T+Cvihe?qBcqIiq7+YY*poEDoB%9&9C!@Iu}h{Z0=jJ)%f1XrPnf5nFA3 z;G1oIH8(`8l{-P0E9OPU+qtRNEwex&Or2ujcJ6DXcK?`M_`1lWw?!Z6#_81jGhH45 zn$~c&2ZJE@ng;mw@sa!lR~5W2vgu-maevX5M3OpV_JNf_<)1Jp1aftmR>q|BAw{~Q zS#!S?2i*jNS^Nvf(iGuA$u3&8|9^)Q;G81-#T{efeN!u(8zz)3a%IS& zDMPYRg2V_vCq1cu!DM~zfm~D-H_%akfdZjPW~o`r&Hfqj%%l!C+nJY1nRy z1V2$z%v4fff29v3rI#BX?)eV_iJ3v~owT;}a1Qca=>bpo6_sw61o9KKfJLbKhX!NU ze9S4j?TMSk&n%c*pW4rePrOVU32EXb=^5@B9F{1u_%ZM2O9iu^&V)-wgY@1Gbsb!c zvmh4U@|rq_0C1tE+HaLIYU2KCeL}}&c%eC1Eg1PLtlYh&HcBTMu}r1_|IIAS)dl- zQ)G4LjCnBiTdEe|)@N6nf&4C;0dN9NQ}-NO(}1Aacg;w3e>MOgQbzCt67I39S6CeByKQfX(Me*&g>j@jxFpzt&;DHDBCr+Ns>4Yg>J6F!xr< z@*k=Rmm%RQCdk#t|9A-iz|1}VsPD0GOi5@p2^yy08cxo^G-iT8I(Ps{04WXy!Xhr! z%WN}NlN-x5UU4MhL39yfeH;grOS^KCU&10gP`E+7512tVK`*?MBN})ipdRlY1hUfx zlvzuhi2K;J{}?#=3;BQePAxuERml^eCaA{A{0N9G^{pFVnSUj17QhISC%*%TU@9KTER*dUAPdWJmHVi6)Xw z7_h12d6aNbYOAlUc|25#rHG5m9aT1-iVHX7V5E<$Fkra0=?vKq&VP%)+Wo$nOf-9B zs{HGanNB|ivol%xxEOu~so6G^eCn?QC03P(A7cua6s-@T#<$5TC3YDQvZQucf?tYf z&SK25KxH!VOFvWw)aB2zH8R&L-Q{!d4&rslCeTN$1pM6rbsPRgo-6DCWw(w=5^ZBj zq!DyLASKe(51aT1)FQQzs%Y{CSDTLCTc7Lms=APd#Xu-PJQGKqJe)(^>Firo(9&nc zznqETKNI7*9ysedU+rNXruS{_BEHL3fI4FRkwqp+6tU?xeGF8C)q$JdZHu^7OI)RW z#Zjz9QXAoMv!N4L?hr~RNwdH9Q$|(>0EdJKt|T|&=5&j#_tt=9_&ib5(_D81R7Ayg zRRX45u(UUOG1|cW{EQXfUh-&A5|RvU+a>zl!~m-$MO+S5fMmdf`z>8Sb;tTKbl%$w zbp07_eJ0W8#*=ZMK~4(up3Tmj9hXeS*X9@wQ}Cj;3rM7KnB>HjsXMxFOiAZ)hBl}1 zr_v?nr_$xGxF@r4=*X7kd=ALXMqCSji+du_P0tHk1xzPCrRi6;i9T-#m&m-iE3C3O z+m=0BnE(vyH`U}jfc|!r1P(d2NnlqDfkIxoya^~5!jx8o6>v?SP8lKdu4RF0fS=KW z6#y}S#4$zP{5z)5ErAp}^+i@>=yrk2=MdVPy|@bTEfWYM*s2v^R1JgW85qJMLRC#m zImQUc!_UOLq|`Tbo_mWuSg`AZ4C1i6!kHLxVNCibGZUcb{jX`#N_>uFkUJ?e`Q>%> z25E0{xokUF>laLl)_*e4dU>B_P(^kUYIqUuucmWO(7L|}|NJz(Do()U=8Dd?or0IH zs+OL;93c@LYvV7V2Fuy8`ZrmV?KO3gQiCNr-IJlP!^K*$n&dMt zx2*EZ>y#43uaqx5QKY;;C}9jvSX4G|O4PL_Zf_o3v4X*eQ%PpCm3L9M%RjstXvN?rd_i& z&+MT6RS+@{)86DZc=Ik#9B*!X{Y93tdak!+?(Jza$#Bn{o}5`&gUiJq>EeMkhmBFc zsgD9lnMdoUJ7FqSPIW?(5{U;^W7FNV=R;Jn+VElPQum{s|4~9TzjYTXr0KT|gkZl3 zA;9^$-CvRd%B4Icg5`o3$-K%ZsFUf#daZRWIG5UcF6!=pB0j3wwLtbQqQvT#Cu)@O z+YW%3bg1PG_=|WT)L~n`5tz*j4B*=qgQXaRGYn-xd?x2s{z2}%DQ#ug1^5Lw&;EMw#0CktdK%Mjvu3bzX2iyy8@^htL|&uh0ojd)X4T z$>l=-q^#{~u1S|l+2d$~%u3(u-Z;O*@ObqdotBCGcem6`24;M0#B5vwLWA@z9!Sg- z_r8%c9t%ZUM>EJ+U|)-G;*Pj`9p}Iu!$1!)a^KinDg<+fZLs+JvhEB+PrAl$9ZDX& zssf!bY?Jk_STxz}N+gf{`)(s=`t({8} zd_#&D>_xU6WI37Ol7(KMP=^ysrK3IGdG+^P3-THQe2>~wldZofo9Ko59D>_Gb&Mge z7R5w~Ejm_hL83s0lgwJgyra-b>+3->6&v#m?*V}w**$B}5?#IGl?MIM=yij@%cp>c zPGBR||6b+(73P39>S#O$f8TzS#?^tFUXsd*sPqc(ODA*$>B{$B>5`bmt@v%e=9E=r$84Elbm^djV*5gyH_;I}sGm$> zFHI3iy*pDX`JY^pPmow&i%j@n-m?-#FhORPuU{-sgJb!Bt(4ZlvjM30#qyHRo5xSz zb9%s!3|f2YoSU#wq^G3AZaX3JAfJX(GGwa=4#@yxZ(cjGEin2-4f;jEcDM>(a`ohe z#nNIB101ZZxN$GrkX>lFCrHz!+{6zE4PIx#Q}>M$97Yq%rxHsi4@6i5PGT@kQC#Bd zO|PZBW-v8oNi?g zwa@NeQWJK!zTC%b(lr1XlSyy@vwE*=5xMBY0YHZjWU>QPZ z-A*M@t6YhO?1SCvvz@#Arv$2T$Z|SGt**yjR``^U}#A^phC}BE9Qh&b8OCcY>WhdNPXV)gpHsx%{AWB{_06{q{&V8r9U;weJc5yI{Em)k~N8D+vvLU zQWCFdFOdBMVj`Vcn(|gzaMuhNZac7En)qtlVO^SfK5MD8rkp(bQUOmE!ys)VFWo}9 zT)%*6sbsnCniF5u$VHm|j7)_xX7`zRcHIRiUSE)FMDIRj^59!+o1#P03Z5R6FfNRa zX)oFbgqDPCSp>@Yt5HU7`&=_WF=%ZPbzO6xMNk4NvPEuBVM&d)Q&c2r6_+*#77E%m z+pbqf6$@(_beEz`_^5Y0WUWKY}&^H`Ah=;4xo_&pY3_8mJTI~^k>CzjO}^F zEukv`LZt8j84T#(lQ^S=qJ2d}a2tzUaiL`;%q1ET|CLg|gUSv=jxmD_S8Q&iEH@vQ z50JFr`b2*r?PL$nAW!guwt58$na^`iJKP<9S)nrX={bM(%j5Y^I!C|QlzSw(_Iq6W zihNb<-qLY{VzRnkIN!pId&Vu@jpww#Z_U3VdSJONA58kmg&V<59qj!W;O=%!+kA45 zqv5%H(xX0Vr_%XO&r_X*x4yBazil3rxEB(@PI(s;WJX~|pj&-~X6UBk3pWs2H9q+E zoHLuz(N)u@%X?{W>JHJL{)+t+zW2E6)&Ou7s}It{tTR0-?INI~qq^IKZ*+oC7=R-r*vFQ%k4 zbp!V1E9%nre5iX1^dz&)`G;#wgC!#V9jlr~Y3=HS^Av@W0mEk&di$51X>8FRH({5v zaE}ioN|IlCtB!EBgb#5GS(MHnuh~fxRZbatR;8+QH0^2)wRwSEsqkiEH^Oj)g9l>8 z=qhHwNQvyw$F9LafhL`kYfyrm^es)=GzMYOm_ge!^o?InBo(r#{H7_eqx5!s--%((MUzp`h>%7SV&nmzg10ly) z6Du>TTPYVpud7s3cC)Pxfo6ri$zssaZc}f&!1Lhy(sj1dTH0fGg3>iIVnPmGN_^yz3n+goC_-+MAxF5Jr+R{47ry8Y%dRb9CUPue`vp2`qU)ZUd<=D(DxxL<=KwbW{1xhHHuXG z;`xcxSANsoJ*dq03f9`In8v`J_`;=9;|rW?n6YZgbZ{9H_G z3rk()Bhb1kjDB(;i)QFCW6gLwU+u8f z+B=|z@;CJSXd^g$X2zQ)5Poni?}FUj2m&Cky2K8i6mE2yN0!crZ`&2FY>X@aSsr+4 zIVkG7Le*TEG5tTVd%?DtJLD(3Fvb%%IzgiaXV!nMJ@hw+dP5+70l%<((rg-~N)MhA zAz{tTGCZ#@zpXxf{|{;LO#BtLZVVS{U6>>hP$1;LHEtOb$v^Sp&sxdAFy;THw{T$OJLs`wiHWJ;p3Er7!H2xeH5z}(-E_9p_sH*uT;T;lGQ z6tYFMChQ7nU+xNNH&0L~Kd3An6o`$_7u{Y;+5w2o&nnSu6JFOLlM4Azl`3wj&52#g z)Z*jY`kuX6Ctc@Uu^No?YXFoc|LLV5d=Z~VM@7pjWBu0l^z0{LnhY{LI=7|v-88G& z2yeZvE#hohP%KYy(sQ?X^e|}x9*COx_#U=1ayW9{!;MQI(#hpTc;_N4-6eh`cSY~rQ3R@f!4C#KBCjx}e zZK+1Va98Uo*#h8XhWeAyMT;jcT?Ax<4)#Cb?6;?T-xfDae0WxKO_1ilh!^FiS+fz{ zy0$03A?-gf4xM*11c)b$$o9acakZUtqYVOhbcTj*Gn?02;{kGizyFEgeLYc{D`#g) z$5)#CJWOxJM9b0uH2Awd_;gjC=*p+GHk0Xix*sU>zp2;22L6EUn^eW%BK!$_Y%xY9 z?{kbpvXxapzXeblyNflFK-Am*OxSeEXm4EQw+ZJM#zfCz))vVjYV?1n5nqFT4W~6N zfvW3U0V3t9!-pfv9_I(?#yfd;i~IgZIoEGLH@GPGO z$`>1d5w$U9mlC@V8=gnZTE~o7o%&Wo_=9wr`A7ZWwef5nP@VX#290oKzQXDeFT`i> zKc4~QqWgj>*9QejybWNbR29|GR$q$5t1zb?Mqx-4peYoXNq+vI?9O zG~FNGMpPr@54j?g!@iX|2LF1>|E{KFx3CaA*m$r5)T@2^=K}s0==EDUNkpmTxcZux~2NyjT z9&-Ycr2R9||8X?kWI5kHs@Yq(@-;R17=Mb00-Tu5b2$MZEz8iBvO9EwGxs2SP%J|8h zgXi}cj^v5Yauz;ljZd<^^uMXZO`qdNWSOcp_BS zAklNB!JVRb^a=AW|CF^445D^cLMkXiXSeOg)#Z5aCR|JPNIx5eLC=1V_%aiTB<<-T zE6^QpZ*XwkC)QHUUWcY(v>WKWLp}+98zR@S>UblYVYk0vL=dT|8B6P2LDNO)O}l%8 zW%gLK8))x!;9tH9n#7cYua=HLNnaiN@C=K-0-CTSodAO--EfBZ4eZba^Uq|RkvRDS2# zF1*zcVD@~!ZQ9q(V?Q>n@#6?#Xr$XOcBV6ve6sc#C|imt1{FPS@LdTBfbP{u){sQu z4jsPmV?Ew>YwC8(W^p7wI?c&y32y#YE&3_ z>2Luhsk5ruGptiOE-*C#SumPkdjW|cNF%6zHC!#e6~u*%HY_~acM36s&CIw z8?k$sn0$VEA?N~fe5tzn$=YK*v{iPzTe}T$DZx?8fn4Bw=SjG?y+m!sU^fFrISu-cjdlMsm~qil{uoR|MUR& zWPXWW=x68aS7A9rHTmmTcI3>Tt_UV-W`!cOzIa!49@?cT*wU`we!9iGJC1)=W_WM* z&TC$Fk7v&YaGIX&GdJC>y0e}v;C!p{mi94r8jSB zov$w+!1x@!H24^WO`CWaHC|!qRd$+pUd~#{Td;3jT#nnfEPW>YGcs>dH&x3%+RydV1FqLbSnxu8` z-mstTx{+tbBjRst9?vv|{HWLU5V@TUU!H4@O;&CwaODgsbM@oZ)_m_D58d zeCn5+%%QgA>P|CQFqzcZWJX3wzoJ9gv^}4RWB5G^PEJ2+^{idY>H1Gfo}WA4b1rIH zWxpSIRZ((LVZ3zl@XnMAmlOx2(PXyI^SeTRorgSzIy)Ezl=h`vgf{p?rra}s)c_4E zTzzGBqRYa!r{TwCYl0L~%h94Z_4!`T?d*xi^Yz&|t3wCX3hRmI0*6qw;yBj>xms6= zb?vO(+yV<^Vm-q|hO82RKt4yQKDm>~_WO}LBMSDL?_8)Xbv`4V#tZk~;tiAJGPr2v5u@pGmDLZ~oDTTF7a~<95iW+Ymw=Bx`120U4WjxXq|3N5+d2A(Vf7DQI z@WZv{0avL?L+PUvyPi=l@z#$33H-6Hu%;DaM{-5aP3ElsMJWr5)z>t3+K8kA=}zVA z6d#r{k2igm;@<~7^z&{_D!J4HUNP49WykMEs%+@WX4hcmp_rTUZOfpyaf^PDAK;bP zJp=71K_E&>Ya&BOXp%(rjRQXC0hw<-e= zoTX8BA99v{y8dwed~#yWP;b0qWZC@ki|1N^t)Z)RM})vFJE=rHZ8*O(r)1J##0p?WU4 zgv3skv|Q`JE{$?$C&Lx=+iGuY^6o9IDe#QyAS#Cy&+c|8^x+1t*R*G`Ul$wLb$l!_ z<=fjfz?sXnT546qI79%v7+cmEp1n4ssX)f4VX)k@R^JGK8>4+E@cQ5p9|!PaP#aYP zS&7{&mGA98p!e8zGI}O5{Ie7-1T=rhI{IVu8}DUx_%|wMWK87GF2ra*Z>>6}vg%8X%tQg;Wow_T)s3CB8pY{9~ZhOr`yN-v5 zu#v#C$v+j4w2OYYw*)5|rc-Ts`$$iggj%#+yo)GeBR$!x^s(epCwL!Wx~3T{>hX(aElN#*TMGt_OzX{FMN#0@d zyZA;!=C!IT7or#bGDGrlQJ-c_UFScj@${%C_3d6Gx^G|I-1RVtS{Dmjy^hKFY^LO= z>YKYlYLn4J#4eSa;yVh-z^MA_{S$PTZhM?{cr9re^xhP|f)tT^cRC@HojUeom3?lM zi>oC~ez&?BHHAED#uLXI=bF6})**!tyl?cuD!jCsM2=4)2XIYh2vGOji|-ujrm>3WSv>LXG2BR|f=7m-)EX7*}GzK9uQ zB`BrcU0G7#`SFd{Z9&V5E6E${mgb5x)HsOdq~I*w&Uv0MHh^{E@cNLh;gLOnv3v=r zi%+hKjm;z~bs)O4}|_&i^SLwdi%Maji5+w<8ix`4PNW@J+1l z{46JGT|JfG)*$Nu7h*WQG_8=^i`+fJ7m1DeaDG@Tgb!_B9@ZP*5??A% z(}?`AiE!)?(Z2kjGh8qe1j&L{LnlKsSj~XwjVHa~_+teBpC|pV6zoTf^poB>Sz|e~ zIZGYCHGZK%^ZeQwetSI@9%%qzHcrjd5*hj)*2DN;hrH_HR}-3|r9o>|Lu|!>sD!yY zwE2G@ka=$%s7fbsw^KFkW*zwJVC~=O!>7Hxo!8u?p3gELjeP-sMQK; z6$iCKYHIwZ%0?G5V@?P#om=pGL*&qU zR#ufvtuZ9UL^+TnlOvEQgi33qrcE%$3!arfgFE6P8g59FQvN-GrHBX?Z5UFO5sc5QEsW|ROz*WP5gO{o*hyXhQx z==vntSdjRCd>$6wtd@s4J<|m@tLwITgjB|W1^&;EOsAL9N+g#x_|xwE1l#WVbD3t& z-&_0esLM@hMf9=szn}NCKa6kcilHQVDagpw+Ky0&#hP@bxY%CRY)zip1Ic=py<`#G zfm7K?jfcRXCeM>w=wnp&m*pHl%)^ z=DAL@eZJ0M9@#GRthBGY>^F7){ifIQvM%;>r>y=6`R-7^7yZ$(& z_{fuTFEB7Lb5C6Cz)3Qz$T{3DC6T&nu5BB;H*VR^BO^`*o_O5}8`Mqt?Hahf2F8xs zCoDazq{F`0cW?6X|9!_1XG{;)uQP-&k9?P<2lnMS=v9K2f%5<3 zi_dDmgD+b5O><{>&p9Qv(WPHfrtAY?|G-Zq=(ci z)~oGj%Mm{iZNEe+<%A9VEhxnCp=I7kij4Chk=1a{$E!1Q8~0~=Z$EpH43eI#Gi3+IG zGuO+;w1?DxV?h6C9`Qici0Q1wol+cWUsb7Dut7~26VwLxNjAngwYN@{GxS*%AW=wh z6znM6vMygDhB55&X#;_WDl0v^jK9s&w%U32G@SeyveqlelhbB;28P|Eap~QHHd?eA zoxiw`B+FaqiSxkjtr(}bRy!6TC2FNZ{0ZOa-Es;OZo_ z%2m%w>QTWeQv;vih+JXvcCN>p0foeUbSx{@Y8zOy-&W>2Cc}O}SJ(Ie#kG=EpqocC zQ;ysawP#)l3goD^A-9_G*YyBqxBcK2iKNGi@*m{EAFEtOHrB9vs72`t|eT*>_5PB}wCzNz9~FmD>}re><|9$%h5@Yz zk8K~g{MvnKY4E~%)!=K{nG=v8MA9jr)sbRwRr7hsgj@TmB4bShqt`e)#4yJok1$zJ%lQ4 z-%C*@kW9%bZR=c!xlc&PvBJ2_q9NFTX~sMy|04F=ZpB@fH`rDCE1$1s-JUoxDpYga zDFB6-XX=G4LoU&z!n?+UmBFmZ8`ZFoEM(^DlN9B-N*#)xExsmBWXduU_On8VR-F4} zs*==L*6X}SNw^)oA*-kw8>Y>Rv*M3+Qqq%xlp@ToC(XI z(@9g360{v3!d)QYoWJg+rRV*oe2FO+8?)YvbZ#~7J>jRHehSQaYQgr?H=IY_QkrN`J_en=5Y3*A!Q)Tq8m&j*kq^t0y zX6Jkt$u|m9=O%Jlf98_xy); z5ynF!r_C7_YcZX$sQ`OL!FHQTPeyeHTAtmhZ1tn<)v9Ca^y9=hUK@ug&7nGtzr-20 zZgWzk%TD|<8spX%$Ig&UvK4Gs*KP|1pHUW#IGHQ_L~lN-=L@NPq;YpQ+{Olz-4WJv z`4H$UCC||^jX6VTJ7jI7aLs_%=HAao7LlrXBvnJK9_Lt-kGJP-?6+FSKfYC= z`u1bkSLuuKDGdh@PyMUpD)KboM1CNhaLLMKXG&Qsl+Wa&I_mrI55+dRa=%`3J$yFf6Uv}C7WDQBhUT^|P0*{fhq&bb_I*k;py z>!xs95m?s#OsZ;*Y<;a~jHw|AhMD*$yH?SqZ`hXiv?Z5~K8F>Di>45Dh}pFN^JByD zT_@Y{{L!vk<8ON+hme(GKy(latQw+!_VEKw=122UErewRgNk{?hH9 zt2tIH5fM&vpZa4}*pLB%d-Pt&c%C z@75@fJj1}UQs<3Rqd>s2e?+Q|7<#0Su_LRL{acizcE^p0)p5fU@=##3{}ULA?;mYp zNzD^I{W3Ew&d^RE?EAp!_v;|Nq#*SYmf8AMAdJiUv$k#{6U~z3<4TUerhEv(B=$2Y7+g9paiG1JWWBT;+c+(`qn&+F=n&Y(u zlG)8m)st>hZTam?^Ko`1VdYIfA7T&@{Z14zj*+THJ}uT$H6J~c6xH<1b9;|HpaYDF zEoq|iPb%Q=LbHh_-GA1Xn_;nlJ9<|R(k^&h9d7(mAt^YJ2$_e)RcfuM6 z$8C7{(7pb^qZ>1`xFKFG9;)27&R;DE9JEeY|342;N4ohZ zag5<>j%5G}K-NqVsCg)ItJcG5fALy=Qj4BeXu*SEvOUOwKqSEB*oo*aMObNO>4;X%+l{zVe9 z-AjF6vbj;^P%_Dcf4CFuKa0FD?xrOv@6_eOEG^#Zyb0f37q#0_RZ{`de!t4~r$!jE z4&XUr>0nmRy9n-A8cl*mlp_tqf2I6+`F~tRePnlJGm<60xAOjwx|qF~4|pM4!Uuf? ze2p@HcwQyrJ_mCYX#{r>jewR;0l z*|B9gUUEHY>O%x%JCUm8NQXp7Oer>9yA;1u@%4kfIUTsu=jPdUY4NCb4|Dp}^6kP0 z8&F0Z$boRe`u$dr*Ho-%he}vZ?E+^?V&@!GP^^atR!l&pWM`S4+rxD*(yUG`nnpdzr4vJO!=G zFcfy7a_gB^Uics3_kr-dMZr9xFQGT2esHv!ungBNw=Krk7p)SIMwKchq<+U;@mja_ z4R+a$`fYMGmHE$MQc~q3hf&5#aW9&(APyd!kTYHwzzXY1Dhr9MiLNZTy*VtvCb^zEovHGTs zm#I~vE@X6L?^hZASlz476m{DtJ2=V{J4u7znyNV=HnejlCLv=lVjmIy^VrjasLce@ zm=BA?>Nh=Y>OUek+>p2MHU|=b!!y6lsLY7b$$OE2lEZEF`c?BJ_;K_d|17~u)%&mOsph4GvnzIFb9L} zg&xJ~a*J;hnEBN^zl`I9UJ_&+a+e-Hw?MgBLH_^l#qnzd;q}v)>yTPllo(jA#DQF$ zdj8v03P_njOmQ#o)h!3eq(bXXH{?{u(bioisJd4(OG^9mf>vNw* z%Z^vVBiM&TQ~#Lb)3FhCYaE;Hc{{8eEX1jo%thFaHAsehjq?Ke$v4h7Ceyyvf^ zE?fUNB!G-DC6ETP^^U6fDl+)bxlgccXK^NIebIh1?4+Cd&HeWX-$2IpMIyFNHct}r z#NVco17iACBm+B%gxsIU1h5-!7zXeN=^Or1^fyS+FaJ{X(*&^}Mv@1P-@Y=Y)KObN z>S8$pO%{VeuFuQT)_!yy~e+w3>SO3dk9p|8+sS~hPAD84;1sW+bFv0#k z;SxuA9ViJYjlnMM{*rz$q-^59m2I6tSH45RFo>%DBaio~7Z6?blvff(JO@T z*W6rmRWl?>*%O{^V?E9MZLM_FGOqHh9 zz9~xs6uy=Bs{h6)3iwEn$QN432Cn?FfuVgUcSCvK3#RO$ITC)IRloi7DcPdPY=k&A zCYx7WX-JoF2T|F$l`-L1IGQoeSQvmF28~hF_d9NZp6*LiT}MLhm^3e_ckn$=x$DNf z42qUj%ctaMQy+#KyR_PhuIjw4xq)ulL z9PJ#an_YU3O>=FXkKTeGjYKjDogH&t3ud7_K3TP!r;c+C9IJmP&r!oJ7B)6%U?)U$ z8cI~rBPt{hBB6#-&0{T{DP({=2dFsjzDs@9MY*ueknkN@?`5#2{;$BZ<{r@<)0}DnmM+#gWhq$Qp6R| zw>mInOQw#79E0$eV_117vs}{iJ|;?smnW1_x;Vb)BHG=)q7rO-pdmu!GNFd;j_7f< zy|OFeQ*Kvv`rUJ4-igi-zoLXGmPl19|J@{B^Wb$$4Vw}=GButCa|sEIanE-x^4;%D zRzA#jdhcuu2|)p;i(ltNoU!e+VO`>!J@G>+;}+6A(V0Uv4BIb*ZOANze$RNp>nTyK>dpYoE^{; zr}hvn@n|Qr{WvQ>;7*w_ryQh7OaD&;1<*(@G)@0xZ2kEq7j0qfJ1hB z@ug5Ic!|y@xc_{#5WhAJ!D5`fJ{K_qcck4_+q`>=QPJBOAFs^>)YB-w>@HvE7^-gf z6fZ+|>)rPf*D`dG*-l%$;Oq}WZ+xGQO>k;rG{X#rD{g`*`KW_`8u-BrzKJ5ANWWK5 zk?>Byc`U-qH7Vljt8>A04vRQuR2mnn;cOBSm>J*nPW8a3k}QN}(yNcIdvQsMxb&uE zjs1`d52`J@j}k+OJ?tqqOAkzVX_{NVVFiBCCsI5Q^oUqO9r9l=P&RE`PXRp=)9{IQ z?vOAX5&!^72})v_9oFZYd4YUy0^wY4+f#dCBbv|3syK9HcO%~YrMq}Z zS}pR9$Ja(P1UQ@c(Bax{RQWG4^b<5tgPe(;@9Zq-%nB?9&AVLmB^w1NSCg1r*QDgx zj=K{ov!1i?OX&W<_MP3_vL&kxXMXX)P=ffy8{*XmsZB;8(ff$e2>I-cmOT12MY4f9 z!((A+<8!kSdPz^kmVn0NjWQFb#aR3ymbnGmjm&+w+lEks=Iv(? zlPs5BKQ)m&dRg6mHdS6~Cj0_%8uJAl)#gcu@t}ZMmNV~o_6e8aUj@y`&w=R^LV=pr z@p3^Nteea$4kwdBh;h-eciBb)Ls8LaaP^kER`?1hd-LpK82S$AAd~Es16gKAWJ=v6 z+S-q3pdc>nfjdn5c8MD{M4w*KdIIL(_?oT9S*}V<7dT|o5bWQbbgb^_hRdJtU4bo~F*c9_#)ARB#;DDJ;qT zs4_H3g>VD5!NAJ~rD9kON(81e3)syRpT3tExsY2@Qs+o(i>#Ms6WMIAE#9CCr42iE zcIBZyXF64!pH$N_MkQ%;guDdp7H40SC_iyc?G?^7Qq}#ZSedC&gR-cd7*!(cNM4-LDf>yZ82u z8sPF^WwiIsr&m_`)R3ywO&l@b)4Uqq|J*crU~ntKugNEzLAd)^EpFS(dV{4{9Q4O< z*d?r1Eb08TD@(^_I2psO4eNi%7+sRw92L(u#cN2USLl&MXfYDQ zET>ikG+P^~-Wv@}vqXpN+9mKkvI!_)xkS!Ib>ap4Qtd&LIFl?_4A>lEOzOqU3wsir zU|*?~4(HM0CT&C;x|^m|nAjIG{rvqWFw=zIA=Je#RF_PWF5Pxs`OZYyQBi=Yvj*=# zHF4mbAAYHnGvoE`#&rt6dcvV${`u^&>Kw(3#yd=G5 z8;*%C=E8+GKPV#`5x`d~YIDu+ROL3sw)#t_*l6-hSNJa745y)D+zn?Zy@jRHB|-XY0AA?_Xj5_%pZ1|E*P^Tco@@!@g9f@aF*ukJSDrPlbps?+N=EDi1#~ zeO8zdm{~Yx7<`ghYx2wj6LxWh==u}g=r zOF|&W!_FwbB4(qe59kKDYbV&-XJ&mCE1+bq)Y{{F=ci?_)-7y7f z_ZCnmj&yZ zgKG+Ii0fDYl3w+)77g8X8Jm+VOKwa|%QwGg#^q-w>?1M+ix zYoxe_0y}@vb)KZpk*YCQ-eCE~yK{>p=*pZ;CzC(*#{jqhphLwBwNo@s-EcFDFA2=S z9;L3vWd(|r*b~iS&&n^XIgAu!xm5I4OvU)n>9G@Ai!z=`ot>caQo2#S#G(eRj&J1x zLP2e5YaZN78vNq0jL^EdUKaADrQdJE3h6Fg)%(Qz)_zEkOifc-Ns3YJm0Fs1JYwph zk+C&PutR+K)Oo|N)Ip@(IR(hjecXOt&Chr0+r61>N?&m%9Z8({yEw&g=BVZ-A0|FRg&%H+^7i z`IY-Q2KdP~!mIGoyRHlfe)dsudI@Q@0KxnT=<)uQlcTR?_W1mZBA||9dr6ERQ;OA7 zN&AxtLgjT&rjPIi%44a`4#mEnFH_d{r8O?7y;B_}4Dzmq^T5B#zB-L&ca1n9+g0I6 zY32a|-3Rw*RllDH411Gn7r@%Nr5sbU%i5sObEBS`LO4 znB8KVFeAlq-Ka?*a>XfU^oRCSf6NbH|xL#@TCQoWt0N5vbv zSUX(qe2-YARg_9L4Y%DZOn;&%MW!a$6lt(Ic3KU{at*u zy&hPWic*qMnQ@!i==1v%6tm*5#b=b`fLq5pkS^XJw~N-j_fNNYB8=m4^1LpeEji{C z=y#u@M{H`iy5TrL*;E7d5;C_!K`00!b!%Ml-rNyd3;!lym$19T?1F39=5c*Z%61;t zymDlxf2UPmYV0#fV&adDZ~zCS%*Umq1EiYwbwS45*lrvjXJ>A)%H7d4b73{}vD27W zBUe9qcqbcKF-n}8jCsUQBqRO(t3)34M2nAL<3Tk-AvrYr)X}(IJ_$X~2EYTK$!E!P zV1`D#gNQ>a?!iW5%Ua_d6(+?^N7v4}89T8^@s*MgCB(HQG4F*lE0cG!*+x(~q1(Bs z&fokhETaT!G3?c{Ir{O?!d2m!>#lYQ35BmP_se@%iZQNcNgyRtacPenEo9Rg{bwib zKdKkjBtN%$*4W4Gw!!EV`*D0cYhVBfoCEL)-8lNsVeR_4_dFqX|Wp_iIb2{#8e?jZdO^tJkbka3( zHe&Ae`dA^*R&CC)9Ema1oLHxfRF&4SU8Ar(2m7E!TtoQFBwnCQya_oe3b0F>fO`au>lNm0x9$%4i_@Vp`jT#9DOi5;H8kh8%L%h| zUqngJkp`!lVoqXRd#m$yK(FAEw-aNeYN!^}4ihv0@@1y6uiMi@l4vD73-eubcJQk6 z3#Tg!@?R{tCPZn~954hGd3Kd{exnSVp9#D_ok;ZF>x@VA5RU9%@-`Q1W_`9<1^F(K z%ae8UwDBoD7a|KM<#_6~rBPdOx<2(yq+e06>m0#uY+**+n&7+p)$1jt<)HUy#zM8i zuIKprtBo}{2;zd*W0+o9znn99dH2Zbn;_EfF$t9S6l3ApTPq?$&mPe#O{zH z)@aQ^SU6vkP#tOr|r1>h4RQ9D29x;T#p66EG)m1mZq*~zb{1~v{sdVz}4gfViELs=ga!zT#u)3oW%NyLGnX0?We*7|&6233JM;&Bg#9Z)4)B(=!Yz4xE1e zmUjE?Z+We0Wa3#a8I5Mq-Fs^MQbMe0XXzviYm3<#E4&vEdOWMbeTS(nuQnN9Jn$di zKIIlBD4RzISGC6lNr7Y^i*R%ajs_{}-Nh+5Rb|9}Q`4gMEpaN}dQvw-tx+Z~a7e#k z&|5QqJjxsK{S>NkQuCaxUfL*}><30IBrsF+v=29tFGJiU)!m{dHg@i#hVWBx7S}{7durDw^yN z0Is8H0b!m>+c?RI-j896pQ<5|J%kGW+*do~YZFYwth!=LJLTD^d_Wuy#g=?xG3e?o zJKAgfff!Y^!S@SA9H)F)Cq_6=@k3Q_e$>BJFV&-f^UvP<&z=~wXt_)w6vBHJ=MceP zKfN|r^B`SMZ9tBN#?u5yU$*G8pIIisWfHzf?953 zx^@v0Q5Lb>4maLT-6B3wH2n2wwi&_JzS(Mo7^aKzW4T-wKr(*{G#LVT0tvsqLfAO- zF^A#o4Yv(Ze&qta#Hkq$bdLIKJ7SnqwIHMPN533t@wQ|k+7dY!UT-3a#|ST+(?x)G z7zy7ailm?yKk9W2mGIrt$8YqzFK=pF14=^Xhh)-?yjdY&y3$}5kFFz^=;2(X6mF?Olvb|n&uy}g9(0vI^0nv3{L;})4v96!}l^V zW3j0V#kNz|iLZf*NWX2eS0W@9AC-n>aR@46+0T7M&oSYe3)f6MvBsi|?~5 zH0B?1FRew)UiPJ8U* z7|(##p~B2-!_7@q%GzA#?(A6A9 zLO8|nK+|EJyC>uHOAYOu8tOwns8{MGi7!wkbCz+ZMMQVYOZbNFj;J7MV2L4Fjv>z0TDE{9!yu~+p zLFHvZa69Z5LZUVdk$2z^E|4ZTtY!z5)q{hd<$1xn%Xl4z4MGeuhmAd}U=ftk9v8Ju z2f@9Nn7@$`Z2&L8od{IY9sgXdC{r*(6prgf59vT~3k1NRYm62pEs$e$N+fyi&t@0I`WCz%24IB96961$L6H~!0} znxA*5DG@g@i^9i4CLP+QtLk$QL>s|qjt!@q?ei`#x^0EqBCZqEnPm1=P?Ogun+@Y-v8 z<;U22b-Yx62m=T=c!*em6tZUhzpPoq0O@n+m-JOT2d1lL82U@3PmB-Ju&a;};nBfg zSBd}ERe;Pqrecy)2!4&Ijv9~4wv_;`@zJW=ch-#m^(s(OOtbTsEte%{=F8$m$*=FY zb^Ul(^b@l8mtgKgg6aEPFe9IJWij$0wdjd^Lt4yu^~D%`bguTnY8n(q;uWw1MG55g z-wJSe6`pzjT@H$Df@MZbTYjeKD|mmqfcLk* zZMo)ftu7&{p#rtX|3`#-ptE>5;t)wPf*(nE|NpqE%66(%#YjwGTlUZ5Fw5H+HD*1r zAGhttiu}=CBhD=l3u9;>>J~}z{N+D25Fjl{w0mr=E)5eLfX*Mweu%`sx(^uYcs_`g zH{R>S|GsLxEX${8yw6aK=U-7i^o5vL^bL=-L4Jx8C z^EDx5-ypH8hJW_qaa$hy#T1ys2V~UWYTMqDOg{(;j?&cD9Ma;#W7TIMvRnO!VBXF14twaGOmZgg;y@@ z<}W4_uHC@%CLAH&1ZwYZU#$dL*wN?Kg2)S3cE2k?Hj*ae;5iQ}n~RW*3{^l;(#;DW zB}=yOB#tj2esPHQ6FOi{w_fW7`KS^th68+eOe0G|VPhB1CrAz*X#)X74ZP!funQ$O z={R?gB0+Ksc0wscNSPw?^M*_s=dJ@C@Bg|M!WJMZMz3|U z{}YdTEancL4bhshOkiQUGQdB^4ag1sZPs&;cqVEX40d7#ta_kN|6>QI8JmFax9te<1^!2VfgB5Svv}t?+sKg%k;% zKf)_vCS9L1M0{gDgD2&@Q5hucZ;P=}l!dC8lCAOEAVoNbJ}h=S40^LWqOUZR;SXdK z^sUX(51Hxm1WX`>)5M z4H9Mzb#pL+xW#C^GhYh-f5`sQ2B6S`Cleg51OAQ_SX+z5+%5j7f}wZ8m=W}()&~9{ z{a>yfP-8!oe?t*2dH1hXa~k(emhI-FLbZ~3@JBa2BrVe-{;Lq5U7u2K4EQVvy&Qp=3hKmoKA4ps{N_m4uAI%8|RdGY% znpzJRJ{8Xf=8Y6m=c8Ab*f}I%7)BsVwZjvRCMAxsF$xf%dMqdb5m^%hi+Dx@cv{Wv zCAq`Yh$MZsp8IaQ13aowW#AaoBQW(|^u``Uw9m;3#MQ2)MAm?rfIT>NL8s71<2HO# z98bw?aaYdouGC*ky)3o?II18HL00pZMXSw1y~6tD%VvY2!`H9B{Ry}LUhCsG ztW2O$hs^Sb=V$4C5=upaVOEXxQy=Z7kmtI1c~**SdP8WyrUX>Pr2semy=?B2(jCd_ zP2V>_$+solbouelnEnW&0ph6b2W{_zh&;9nCX#4&j34qZ)G*H8j@kq--zDiy1^B*6 z5{t2_6hPXgy|ir++)O4Zo{RGxBw-$FZ!v)|&h>4#)eT$@#6((4ojG1F)wu=72vR}_ z8I`SK|f*#xlgo<#N+n?^?A8fN2-*J7Zn61YDNfuXI%$V^Z5|D0yPzu+jQv z@@n>2q*ifHU^XCNbgk(E&O5;NsXY9m&qS)uK=G;act7PXh7amAP6zZQZi@OWX;<%B zlBbbgWQSs9)+O%;#^N0DrmMrm9}P<(PP8f z@*ES<$@StU``;GC07t*d^61Wjpl+GX44s>XB<}bp;D~x@8905h)JNy@fVX5n!3p7G)% zdaSqMC5F#B#`ifK0c7%md%KBvM4V@Vf%k*Ggd^aQ0NPZFhY}#WFhC7Xr9}ACSL^-m z5QQUCC6WX7;x_;IZ6!rkI@f9X<5JoB#$5Ik8_sTS}}}}<9RL6ZIYPgOCY6d1rvaytF_i{ z_WKj_YYL(8fm84JV~d&91V^@rIekMv%gN0TQhG2zAQYUP!I{ULp3YZf|%Z zyX1C(dK9@Melkh4b(mV(#{*3A)j}yE%9;T*nA3UV?N9CiI7Kn?e$qZ51O$qN*2800 zNe#}f;s`bb`h_-JiP0XnlOW!nPZ9)O9d7Z8^DtPQa$H~ zPw=5W!Q2G-p~4IhBsSksK5q}#s30vJ`MH03M9@Zreq(h_{A{{>RePhj$EL(-c@;N? zz(~I)f<|T0EmeDTLODOm83%K{i4HU-Zk+>BQDU>zD?CO-PnTP$9t)=V&4;|kdmd&Bp(JI@@-NqKct=#VJAg*seyX9R|1%|Si62l| zQVtS{g?f;Wm>nCR&LCl8oJ%y6&?e}Df!_cJqghP7;*!LMPwcv#5>}GN{YZ7o;axo| z45Fc=a@BN(3a*%!ST}w{J8kZMm{P^;ta^*%Nrq#(=r7f0UKk30Sn994w+2SaMy|C1 z7}WsE-41Yjo;Lt@bQ`H{%8g_$+L|u`lXE@reZv-rOmghVHhJ?&3wMd?WOT}RSaxXl zmhYn7{KGvmH6dsu>v-{gEZmmF*ZFz*L9MQcN02ex}HNvd*#0nC_Qp^GGY-6>5v29VWj=@1NBBsIGJk|(<1;jxP%AnCcY|^YpcY(U?cMX%mFVcZ*K!b zlO;s=C&H^c9((0e`bpDmOm3nnC&F9?_+B)@CizB@lbRty|2CmZw$x?;Jh?V_*?|CM zwq5vw{pQ#(-~|d`7BKD9pXI^Cj&^knT~Xma;j`7v3YECP%FT&;$F5IZ*AEgLJ5Q*G zuQ_kl@Wf5>1OSz?eA~SDvOVSugYX;DpS@hX=3%SH?|#YlgKHOv(v1EgmcM<}5K?uL5N&tS z9H2{HFKF^55cawyq-Oe`@&=0@eti3+QF~&M$=F{cSYUCk}u=ra0 zrn8)DzGZjgrk4W(6Tq0dna1UniAK%?BSVmwjK{EDy@Ahd9|Uo|_iK{8U0p$&{-ARA z20~tiljvE}W&}#hqUMVCD5DPf-a{##9HM7Fp`^MQ|1czG0?y+}a-mERhq*{m4K{>(L_{wH~e58EU+VZv^}&rfLh{dAu0JYSJ67!O<`ZsMx$3Z&JO-V z7I59)F5y{*S(Oj*{yfR{+STK7)a* ztL;{Y1HQ(RP>2TuaJM?4(bA$;xku9stj8}uUGOPsRC{p1lHfP~sf(H|y}w^M?owkQ zWqBUuJJlG8*K<|!bWqriP#p5l%|5+1KKt#9E6vRxa-&8Z?8o&q1W9$+2#;DXySjo# zR}fGx-4$*-hiNJuWPye=Mv+ox^vv;mS0-|c0S|X^ahP$Y44tS)=oh$RTzlKwtO0uY zz<)c!>+3)EJQ*M^Hak;M=UYE1rxWrgSguWoG*o4|ldyn1dWT3YDICDysqZ(L9aujk z-esUqb#SORl2k8*=XfDskST!iS0V7D%{uPLii`%ijA^hk{QDq6o>Az9D`x4EQf`X4 z*Md3G>wD#V4+&DU`5xb_nCw+_jC`UQ`%bmOyk*O-S^`JPxkvOXF0sEtp z8#eM}oVr38)*P#Kd!yt21xif2yi{({BX)uLfl0eD|0llcbcGJL%WR91X=St&udB2d z8&YY1?Xznpq%E6i9_bAr^<^0X)25tZmX{Tz$jB=Bl*Zt~l)BR;g)_91wyhSw>Xu?; zv}*4zQxFIxfOH!GQ%r82{l0`W?Rku-Z*BfGfj#+9A*XKTjTr+=1mE2g=u`WS<4Q}X zK&lRA$F0fF@84q}iYO>mkK8$^9_*#R0h*9=^ zX7UeCjY8a9vr`XF%}fbYNL))tFy_Ikg=SrHZUN%SSCOy)^mLtYS6+A=$}$Y(lxj1# z0bEwXuXIxnUsr~cgDCWMQvz^l2e+Ccq|c{5NAs|C0Fnq%)A3i?YL|OLI^;qyK3gEx z=~?1N*w26J6ki3m`!}jnP0X==^eej0<4YVOc|7f2OhU|Bg}I`xK;C)0vfXlj=Uto?OZlw0>dj@yWeC`d>eGzg+}3nJ3prJ{67$AAF>BA}$y(B0i2 zEl4vkI_`I**^?82hk8`PW?|ZMk*4isxYu$T4_cwyt6y*C;XID_^ zV~Q_(PW-v2&G8ml(A)AS=iN#G&VCQX86{mJ`&jetR${}at{c66#$7bx6n_SH=pNoA z=m)*D>wY)F{akIOxMix3MD4pXRF5qw)TY6A`(;Y~eYX4T_oZ0DN2!*h?yy(QoEXH= z6G2lE%K;MJTbGvWVC0O_pdG@JAwzVsiiN0e&359Yc*-2(Pa{}BqogDbB1@@WyR)n# zEt$nDV}7vO05;GA7O4;tq!1uy_mV%-+LAZme!rhCch2XC{Y%2h{Z)c|NeF(5Hy`7e zM-eZd)5hMeCmdp{#)B&B7Vpf|q8mm8&(XwseGazBJY|TqlV1-zZY2Bg>u=p@eDUGr zRybC&<)zD~gTvj+05qyENOd{K^x;bRsq-H3<{*Q>w|e~EQn;D|&AT;g^;lB5AIPe- z&VLkZxj22tf{A}jz%+&Iea5GmEaB*|tpluPw(>SVcj37Dk!oTyytyt+Hrvn&o1p!n za205W^%f~Fw^adAO921JGc)GJEm7`yXg-g=35GX+aKt2o>Ixyeu?j+xQodjfSk+(A zfKgf}=F0Y0D3zz!-~JGA1vKMX$K^IE^!J>tp?S<)`qd(C3Hp}3%w?YGG4t)YI>N3V zs#ws>vmH#VOJOc`eUK_>-}%<2Bo8IT6OY-8OP`f;Zd%25PCwQ7%J*wO0iK-RDkIv> z0-ql;vSWK}KFFnf@U_I&AqH>mm3NrJVc}~`yEdfhr$=3^uPW}94Pr;mR7s@Wu*F2WIB0Uh{gRd@&N+7>Q~OX z2oIdJPn$X74WQ|l4~HUq*}S94HFQj%Ka-IlP_7s1*@*S^$3ucF_R~t2u=>+be%I)c zVtm^dVv*xWV=_)tp4cdireU~BDL8H5A>U(ClqV*guZn90!GZ9nU{xc-l`R+2?w8YP z)omovO~KW)(F}Ayp>^K0$?>sG1SysSAejshAU>{#!>uo9&D6SzV{U&qrZK{i9TD#Y+8QxK zw_Vo3_q5)yW)tR8}bQ0?4)`ujxKHNm<62nUgUv4I0jF$LE@oPti?K_KBc% zXvW!qcROD>_J!u%evB1pmn!X6BWtILTS*veN(ZxizfF^A6{H( zaJLhEewPbLW_i=-kY<%mf$Zdb%;`fFR+m10+#4+yZ6ZhLzih2G#yDOQNgVsFz*J)x zcGFX2RlJL~O=zv0jz^QucknLva%@CyOI+tR!CN;%nWN9RRVUoV@$!Yh z0;mPD_M~^>;znG5MfvE%$C}!2#WKVLF{3^_R4O@Yj{PScK|Jz;Qqxp-`~ANCUD#JT z*^w_zz-(z?VUzIL4js_Fb1v-hq|n!w`&wDnOtp7=7&O|@PzXj&RF3Q5+ivStbbv#M zo-93kwZ91hc)shGu*okx`m}RqS^dLyx#71<)`x81TOpkB1)*2qn;zr$pK2@n>rL7@ zkb2y9^ zNV1;R>DH2(Q9n=r*nM#DQ(Yt@DnGMsG^|pv)~U;@snU6+3i}*&QSI0N^iV{!Aip{L z#0Oim`s}OfJL7fd{HN*<#cFTp-ub#|u)Gn-Hgn|(!hhca>cTvIch@29G9hPE)%K-2 zL7Iquxp>aTpSz5Ss@JQ*%B`*5n(7CY;tH&)=OXoT7ISv|D6adSgkAl3(m zS{U|$z`};4(D>)Z$L{!en*Dyx3R?EgijmCgDuC2yt;W&(ohBz^qQZ38Nx@uzgH`OH$hs=jN05SMG6;QxdCX z;NFZkEvh72p%}LNOuJCQN=dSey^N!XT6z2i?R!e|HX*XZw*iZna6p6!brZ$tntP9Y zu&wdNaU(s@qMT6CEc~g;s~K(h+~w+5i?d-i&eAJmKI@G{0p)_OS8G;l@uUb*SX8BEbjb16&(|!^l&^n(<~3&DwQq?%rTspP18`jJykSt_&q5VLgFqD-k^o68d92^WpMI$y%MITyg8Z2j zD2p{1?H>D`ARpD+@-Ft^FF2ilMCo)~&64e-`=?H4&Z=DS+z*6*p@7dBhRgrRKHDr} zxA5L=^7;d}Jzw`*DxfRr{@6c~m!ksWMqcq|2r1h`j{d{ywYRpodB>O@y-nUP$^B#9 zPm?Fr%?LA{PcWULw0wE`&4}9H7kr;3qUp{=dBG(y1gQB|U3eV&p}kUeNJ;d;ear679AUAJPoze+BcqT1X-pYvc%>70@3I%7SZ0m8+jg z%J=ea6m(FbHx_i<3-pQVrT-j^)Zx%tfIj#B8=)8Rq!)(@FE@LFZhw|7Y5bqHGqxm` zp0y+tzsiE-e;KKBSjA6DMM0>YP>9}7mi5~2zW-AleQ%j+pd@8S8Mcl}$bVTWR4W66 z{dm)&`)SR<14kw4p8ndTNf^|7ND1oJlSf6}pBU@+aX%0pFOk0Y6C3D5-oInV}tzCPu=+3cB({~rT6=E_1+pvBfgi+v~k`DyO2 zB6^zpYLCoD(bb~2#SN*9RA-u+=1a1*rNTRWr&W_fIs}$*oEJf&fuK3mG<#QeryMFbfJ&%!ImxP7ZD9K9uSM*!QDGOo1?s%pDFpn3g^gYwiRoD1U5b<2r41;m$3LTmXS z1PmjYJ)GZBm1b+7LlZ;4Dg5L{4;?u4WL-r!_27)nm`9~x@nA~?GT9%E`ScQ1QHka_ z^j$mA-M(mUNS=$sb?yZAp_qsjk!?1P5-cO2+EzB=^r-4AQF5+9PR#V5GOv{ZcD zn5(5DXB^wY9Z|xWe9QY{lk@jKByBnB8EfYyD>E)5#RKb2g3=Zi14B$g8n>>uoQ2vZ zCJdh5QTx@upe)?YIcMcZNn3Qq3Vc*r_3=+B&Q65>>fa!EDOStERa~v2p7@Pkc;``& zr}zyU!)(oJ8tLhPMr($N($@Lj^d+q~WK6oWZ|}er7q+hUkJfk052=#zrm+>tE?7(G z0UdJxUvvn3>u!B_r@AY?!8hp)Jm;CYOb4Gzl?tm7PbeF_hh~zOC}UZ#9bDV@?T8cV zNlW79Ru6X6o|#0mY3H*Q;mSV%F7f|WNyLY-?#?Ck7MMpJ(raJd=XfkU(l+~?Jf6RW z|2q%Rr4<_ioYf+pW!8@St~*YNbZG8MvUr#d+;kj#7UwJ#4EPZFoexI=*Z<>Qs+T4) zFu!rQ&?k+0+PH#ju{HVXU~L|n9VRP0eMi@t*@!RsRQog3MP5Wy)2;Z)`QRHbtzYe= z(+y5fotJK{Ah*BL)B}W((n0ULC=zu2sH>&p^V6I^+(v>0VuDOi#+3g1Uiniux|>*j z^%LRQJB~Q9PpaLdF+|Dm_Krl#R0vh+3t&fmiRNW&r}mu=`c%5tRc-EjZMnPW+jl|) z?d1z_QFX6~D`?RU8b+p+o8LDK6^a%YW>?HtSXs8}+f##Lu(s}gf%{kEpwjBz zJE=3jTB+hB9_~KiZ(p6L54rJ&8TLLhCseRKg`Y7NjTQDg#4)mt;mGmrC6lQ@MCn2% z7f?&Z6Vv1VPfNck0eyJ*HS!Tk;O$n8v!Md&gFssKM9lx6=0?9`=ltMiURhXD^KPQU ztSGCTyJ2+%*-Cq)nG#>MrZyMU?J3k)KEo;%s4Qhx4D3ZQ{p|jPM?!GW{m^lf?&X%e z?$IWB^FM78-X4D7yDeNJI8ZDs71zw~C=-%3nG7|OA<4kSkG~}H?BMyAUh2y_kyN_b zSFj@k%YaM>D~Kv+t)zTUPrpPH?C`z9Go;*PmxRp5|FT`)s=47 zrm|NT`DHbnCV4{-1vcbH9GOt4byrvMDwWh02bmai$_lKC(9^jN7E4y~dDEZsxfjC0&S5R;_JD2#>%(uFzY0k&+}Gsqd!O zFkNEED6vCrV9VqitzW4vzDDO=BV2CdH&@C%@OuqWlC4fMf*YmUTouS!~6bv_R@GnsdYpRg|(etXgWJ3*UaEEI*7IW>>eMA6cz8hr?3NLmdNcN^Sn@& zJL`X^tJSN6F=#rYf_1lTkZ22^{D2PdbtR`?NVN_Gn{Fe!n zps~o7J^=^W^L{VUGgS}hTODnf>Kso6$HH3dzVGgs|M%UU^aQkqID%uD%$W`sx*IUd zfwFHSI)rl$b+yY8BOk}PxZoI)MB#-#dTwaXw_5L*EvR*v^tG;Q6s=SkEC)3qy&Pqd zU>tOcn6La}mA4%H9kQ292wts!+KE{Igz>hfxW_t_oPlJ5O!D#n!6Y-x?ThhSG{{Vy zSy-OFwQ!hVGY7JPK$6*R&2e_1{ew8Y|l~PwIRTbCmHe zkToZ#sZgyjK(p6GH`ri}#m39nc>&psYfA`pTHrd!diTLs_<%+L)7*t1Z`Sxv5bC4*sQRA2gh07I=7} zSJRiT#KQ2Z^@0D0BT4#;V9%mbXxm_mPlmVui14p0u?BprvZEi?qiTjNN`O4!dmtUZ zjSPg__v^rs1x6#%)2|L2l=46k*SQ*(Y^H4}-+ly(j(@M?2!Z$d#S03)1AAEv>JAWHYvLCt9ED6}H{%@9@3FIj!X`0f!=f20LaS=5a^Qg;~dJ=?}T$ z^ma1A#*`HX74`)$oi7IS;i~3tr6SlRO)!^wiJ=(rpN+^$*b~W7GiG;939Ke`WVi%$ zngCn7UCjBad4l;s{3lJ@XgR;;ZS6VGWN-c=v>EDYKa_R>UC2kB z#I;AcQlM|M;V+kQ|Nds0)js9#A>HJ>+iv)yE?kO7&Cgtan@(qlV^cik5R=B!clc{F zh1+P@(H~ljLpLr!-w<4pfg-z~4eAHcmFO2C5iNbT=VxfxnxEb#*1PjD@-7XyAkL5G zb#u+PNLKD?(^ukW^q$`#HZ&yR>D;m58i6g5qZ^z@oj13#ySw|~=?MW!%Fe36Rc`Mr z`ZX05sc)qgOobN;%OA$Mg4^IaJBF>>E>n}0J;R&3!76k)I?h$pg~z*rQ2VwM&B5M& zI-DFlHK#~*hHei3sh$Kww$Pd!qB?~V{OiVD( zV(up&X2Q1X>;w6D!Gnn8hZp1!w4WP;lyqKxi%oHp7X<%0V}Nojv!E{cV_ND^H8v+2 z*S#s~4(K|+P3V=I*mE?71xZq!R!z@sxVA|?~l_G=X`)})yiMqVqC0L!M z3Q;#+1=F@6Ssd)ZgDJ8fh+|S@-}}<1R-9-S`L*J`bsQhOi-&vrS2|GMSGoLxoBwHXXIAH4g24)1)mFP>6;XqWdN zqbVwFHl$qEn-%}h(VuJA#jriW2ftK7iC(&wqh+2HDu?*Oy|V>ehV#L-e?l;bAFx-Z zQsR>Y4=LX6Hqk1a21mE7#SnjLJnj2FdL+c-hHE_?V9(^!oF_pm>k^DWq@5X%M6MWg zjh%n+vPaj|p)DVMPISUI&UK`9&?JMHAHN3$>pz-co(|8%ebdGWdVTmNxF_<(_&YVz zSh}&5WZkN{0cVf6)d%v`=K7U|s(x^9S(Iz%e;dYB)n6-LJLW%HIsSWC-KGjy4(@2h z=zkrSyS)Nt^IWEJ+|r3PtdK`!M+`izV~;Z!-{)M~TmTE{; zf-8nA{=pV)tSfp^5?-lFM`Cehb6&sEv_gMFs$zX^?$Wql$+=v%ytqpqMM=ZN*r=uAN0 ztY_BRzv$bUisC(I1NeAs@Z~rT>FcL2xq#Axt1>?jQ*YeLtT+Mq@M7svZ*ZvZtC(y- zr`mjx9WKl0#dw49E@ze_mAPH;U8&E4aW+`#ncdOuyK{9-{JcT_B3q-ZPBau{^G}Yf zl8EGYiGv4)>~k8^zJLumyDT_05bHwb+S%T9cl8rFkt1Y)h&8_R{S^3f*=u%H&Fd0e zE~^LeaPpR>51tDo{P<{;-OD4Dh^79nc;LwxUNvW50-y{9_LN%-asqzbF6f!ClKClX ze#n?!xbm3&- zC+hXKyy4zMS-d@vYJKX&>+4AbMi#uYlRTrkodN9233^-yJ32~Q%3|FOsbQD4t4ET0 z6OsdG{>9PPwwORU%1Ya61J5<9L2Ru19rZY>@F_^|9vNaEsW`Y5@Yu{n4h^?Lpm8hbO`G z?*yl3X2LoTGa)bd&!zjY&fmxOq-Pl&1J9+P1D$CQj4gxzT6ocL2KZvLPqNq<@G=~$ zg&#kh@^6o!Tq*&7rhKSkzpg(LqSEn}9i_EW?nxX*mxxgT1%(?e%ZX zkZHSJci}M2{pC48@zH+;{oLpPrxY@HIjDG{ayrYVs;sojTC!%t0r-iLNc*n=&# zJKC}z)i1}*UIhL4r$mdfqN|(PF^VW?IOI-p`F~WpXoc88vCZoKCWI9!ufp9u4 zxQtOaDnWF2Ta0p9dGBe5Tnlw6*i-o7QN zvBF$m=7q1z!UyW91bfSv{usAnA!`+5PNA)8C1cz$mmqosN3wctbx)_r;KTD9t8QCX zF#`u~%6k*sD&U^@FXA? z5|c=34`bmjy%B3LD~zwNsRQ#-CAEQ@gC)i>jKV7xB{nD~*a{nL5+QKQqW7KYu9BdF zfje!fMxTD708v`}?sjyr2A=qiZ#CM=nbJaeH&9M66~Xg~>ejI}b4s^-QhC;1Nnvqt z4=bmX$Y>QON{sR>v7Phw>Jos)){XX6~aH%BAObysjV_#p=C@C0a z$)Kew9)FkNL;gV5fhIJp)!ejuM#79D(Lv#1c&ftu@LJNqQi4kzGI)?rQAXl+>HW|t zV}*k!5{Y&hC5Rqz@r=u3qpMUw?P&Y4(Qquyx3sZ+*k)TUQCS&?1<8+#DK7#7&A}(SJRi)KVmf-ESTISzN;n`EdT?z(j4x+uleS~QdMGjdlu8`yzB5NXND1p+&zfm z@p9>DT0Zl^4K07`MN;Nf;}PZ5%?BEFB*NMXskRJVw>i^qGO}w{inoHV;E>5Eaq}U< zU=3J_UNv`jUU0Z{Wqg7Io<#SGT^sJC04LnN+oj(~oDNpCFvscBk$@^HC!e}ZKb zec091p1VKZUHAL}rCS!l+@0oxrr9Tolfj(WiPo1KgTmd__ zT{J_XcXMD0GK|^;&sfytY@#2VXnktkh~KY!q?o`OGUf1 zB*4s3G*cs4*J$&Sv3Mi(88u8WEp{Ta&D`lkl$;|_RS-+Twe3gPCAhRKYfi%}hm1t= z(oAi4_tB%2{P^ivWoZUjGdB?HbM~*joI<3@nQGzIxEF4q`%$kJr`CD+Z-85a90qlw z&Gfm4(hH+fJhL(h^O3QEab^WfnycKB#IR4 zs@JJvseCtfbH{1J*%IV@l31y#MAJCQSj0(aXK8lcN>;`Mu_Ioq$T|kdfty};V%Cfk zB^K|_UDQ6S8?FpO9T|P}h`m4F#px(}xWTfW66FvNPxh{#bu7R6gR;Eie2;5SWlE9p z;Y+eqZnzcOHL;O=y5)K1HtcJZ2ZdfKM@$8I+uW&^+36G zC1dU3i|j-BpJ8uk7$CP@rfiP=$g1FIvw@)I%5CVj_t&>J&vds~hnE=`pF7XxDRIar zF_?uCmzd&{OP#!k@?9)8E76|0TlH@L*3`@>`}-uE-DsGLPHLu&y+dNr9k_s`H&t>4 zAe6CC&-iIy4@?Wulpm~dXR%OXf9!S2i#3|qIcY><*sQH};H$|DaXS9nb5ltR`r(Lq zdgjGLt3(beOO7M|UM8+_RmCC|a zda@Y{om9^krQY;gwDky=gHr|35cc>wl>5&!7M8~AEKIm58;f7F*JD#fAc(6|Hn3#O zBPgm$zQs?N>@=_$)#iY$RSX-JOSLV<#43DI>E^!(ENii3UAMBqx&1QF&NSeggsU$? zJWZ+8CH{+zZScs#bIYZn%&n%ZVm z7?~T(+y@MPUN%7{^nQjg^DGcI$*_U<-_iupS`{Y;G{bczp_>b5pcT1yUvdn!%#l15 z|J*nggo&}@+CVyxBL@Eq4CH=Khx$D{rKoF5oG!$M54Fypuv{Vv>{rqmq6eujWVY=JK*5jsp$FT9L0pd1W z2ZX3SO2M88>Q39$J>1j>$wCz)u7qV{Wo_5&MxmnT2wC3`qR`6ieeSHl@A;;)xYh8c&i&ku{$vzY!z33H8c_1v~drs}cTt+9s5&67@9$tw&o6YXS zH+qK^vyd;=(V%;#v_oj9FC8>Is^)@1YqU!xc zi|>*me@qTxQr}beXQA4J)TA<8W2b(zQVn>t_~~)g>fPY)!ukg{$KNsnZmu>_F8}7{ zd6o*x4K0_$ef0PA+%L!PS9<=ng)#eG08SIReWbxMd#y>^jgDfe3zPzUPRv!9CEkB? z$8xh*Rjm1qo*#E(KAL2#Q2)cUCe4F)#FzPrMqSo@|3Oms_7~j)1=w;WI*p}bMR;>- z$O5-yxeAj;>AD?T%o6k(#9r-41q+bH5i4q#@9G z$?;v)y88{HVR12ycZ?A!b3L+xi#tLV`BUL0XV7jT09QL0!*zootxkcg%B{f^3W2$l zcs}Rs%Q_~6j#bZ|q$Y;Eh5O9*Btw{qVJl|2db!Y%+!#Y<*Z;WpaNGZIWiO_lM%xmv<@J;O8vsl|i>Z%%i zeEeoc(g8@o0Rz+zQ*y97A8)^MS&VG#w+O#5T(=f@@>bUn^(-X!( zrsb+{hDB~8_lT6=slebh>RW&$GxpMs4DaHVz4FaIP3I$eHwhklOd&( zAwF;)S;2k84|0qPdsWs=!{Z)Q#y)l18ZhAJ-MGk%t!#k^O-k5lmUYGJXE*H`4E(+u z-m`TF$Z4EV>9o=8DM}f*iCxv;%YGcY$FU#{^ zwkk}`s(}Egf4si{gzB7g|Rsq7D6=5S53v;ZI3``5)gpf z3iep?4fKSZ>`R!Ujj~a&1=|6a3)D&ONSkK0jyKarlkKFEemjZFcS$P59PPUfRQHZr z?ck2lcwZBxTg7gN+d_+x#L)5a{R!`;M3+|Iu7?b=I~|tE^BG-x%BSe&6H{&(shw17j$*`mYi4??XuQf zh8#FIEXQZdj~LzM8^K`X(>P#G920UbnN5m_d-}S}Tk|*bCxZcMex5xp1H{xMJ#^TK zMX3wMSn3fXX7X*LGlPcV)vGAy<;;Uc3%B(x-}O3N!BagbGMAN_56c9qT3tEz>4VY6 zC)03d_n52q*5+7TSI=9F$_nf}6&mU-B6T}hkH*bu>eyL!J1j?T*_t^Oc5I?^nv=VLomnZJI%>3_lHBHzlV7#Ag!-Kf4%ko%y+gzrgb zIEB@Bdlv9O18*+BL*TrrbqBxwd#T(lmg>E{vPKe<`}aOHb^-4dS#ucN%C+8lm!EPK z_@lzI20~55rnQ@i%!PT{0-M5l8hWr{vt|x=gXI{@8_-AxT?6DvGyi0FpZxj9{zEeS zn~2X-ANd@guXuQML{PE0v{7hk@357Y^S{vp4m3l@wpSS*UG*XytDgGUruup2l1JZ) zm&)dgp4D1%URfUZVYztFH+O8;`OB^1jjhwiUTnqajtpaTUZxfPGIWSX3Rk`CM`6e# zy(giLEgG>K`ji#B+Lv^ZWE$?g^r8sd%yR3>zT3dFP#hIhzN;|#P5lV7BEwM!QstCs z>BDv} zJHg}{HJ&WEZ}gtgi&7K&7Ig}uv9QVN9WiTd`GeSY`Z`Zs|E&lExMSRNl~VqVP7%25 z-301b6tE9pcM%6Fs9Lb|Iw~P_-#38@aTFU!rs&A@$%+)PX-Q5{vj8W295?4qygB_C zu)K(MW%PrApkXh;3Z6FsvTMCW^}8cLY%im$3g)yEtPyt4UXIw#jP(+I-k~!}XiYcW z1jb}BK}R`nh$<*VsEpA7$G&cf*GXui|4+n~J{5WQ==P^x@w0XH`aI$*BSJ z@DP&QQ4(Qd9^(`7Cg z>y<5Ph)M$+rD$+`KA9sPkHNZ?wdnU~V{r*ii2o zl_40)aT>=Isco}NoOi0Li~ zLE^eRMweXasVWxnyk?LS01e=TlO!CbYL?kLE~}y)EWY0zw|E`31=~*Qyj^lafb(t6 z5u2W}t3sg_;o7SNyu7oNm%PQWN<6Hnt;+0XvK^7+> z_wo-r^+r;-S7btF9(LXRPP~LWRhtJ@(8I3K0Y86RUAeZzAR8AS3W*)llCd}NwphpT z0)HKb)x}IH;a*a30k1|WkzbZ73%&H5cD81soW)K4uSMy}q@Ec}Ih$5mb_3691d7la zcT3V zrdE2Xe`eb%Ft?{LDQ3i@OM|&{t?v=o@f7AkZ{K(VA8^yBpyM^J0M{dD#z48E%B=S5 zgISk(%!j;b;l!qj7!*YltKCBXqpZa3BHjHt)q78jH)gs}2N$K4$v)ffJ`025^=?ZbJxVPkFEUC_`{d%mxWC(;-XqljQSTtCAnaaO zjzF$wEkSfWDPuYbJXohW^_v)@%pd=L4t zixvEH&Zh#-ae!EeUl*uUplo_&d{JhUZhx$Y@VDga&x*{ZEUnQ-K~lr2i<+E?Z=IMk zs}FX26=k>+cl-7HsqNG*NxV%F@svK%z&)Rl|7rqm)p+$}QSn`DWn5&0`1ufZO%jq5 z^wq%EcQAHvF`Sq7*4y_{u;7As&RmnO%Bugk)Cl@GPySsL)^jpBvo6|F63eOv8;-Nh06FfwlzoOG!pVze102^AC|L819>V!`E+Z5r0t;rfv^criGAKvIzQ`GutN7+0 zSP^QP=02)+$R>YQJWt-!HAfXxCS^Q&gFja5eHh!aGOwk`DhaE8D{%+Wv11xvM(jt) zEA5AJ&x$7hh&pTcTbR~lLIAkO4nW~K_t?ukc}97b^V2jV<_!#H61QjXvgH=f3&p~P z=7=wi%HEVmh+i|h$T0Tce5$pW3+5bS#wF8YYf)~EDEICl?mnupwg&<-Y}-kC$u!lJVBkMZrR`t$FCn8^l-)=c*a3q z)+BLp&Z%2R3HUKSb858@Y*&p~ek@YB8lHZ^{SAv6sN=HH&|^~%znwKE9grb5zxW8p zE#I{*24M9sx^m5M8A2cC;YT%D)t#c1`gr?Mxyfj+iI4FrBeGrAWN1=c*r?lf(S&Hk z*nLVoBs!~ia8Nv+Dri0-&aoonmiG@waub>$JJ;#q;Lnxvf$ zW98fV7T8sq1RI+VHo082JC|OTKCV7Y)GLz!bTd|o&&9)$ESNM8@Ls~71qbJUXIeZZNW~%SjV)3Zc5Lp&VaIv80jH&7}jhXBm zL8c$>9C6%rnB8s!xn2MD73Dh(yCcBW%EO*G>ozn8x<9w(O}*QUMd|W&fIQr5*t~~N zb1k!&Ay&j}tNLIw8?_b3CLz37It_oh|6b%GlrucjfO=yfxEhV7!;5L$>OvsjJ2RBAiMan_2ptxnR!+fTurWU^fVHVXK!33@>HL zX)%Cu+RUOs2I6kwL*dlckvpvnyw5x*nv_q-gy~1+OFts0!D*1xoV{0~JGJ;0WX+u# zo;c&Sk-Lv%(ZIY{AT3a&9YcY~nNt)N=`zs*l}w(h|YXzKG}_&2)>#xDT6lnZhK(T)a?@r?lxD25eS7K!`sDEN8$r)$r%kUdhwP1(M2**HJV zx2RM2yy#FLh%B@1KMaOQ7=F&0l}20_X;ODyER2^q4-(Y?duHME*s`s7fsUP0$Dm-T z+~zy6c5#}iuAts>aZk%XUfXVfns{WqeE`P;1Aw1+Gb1YL$2Sk9IR>77s*-qEYH}t1 zf$v&?nXu3M)H<&%c~b40!H7oHA;lq^TN=`xy95o?vE$QM4>f&~rK5Oy*ClTr^}~pZ z+?|`hCqZhGmXWp9RsXmzDb{w5Cndq}PMVV!0iDr})dJR}xvqFBf12{XIyznHvQ#X& ztB=dw$Tk_e-`<-`q!xVUZ_5ELL;J!BfYG?v%b)f|bny17&h_u;r@M+N$kL!h<|s>JW0F}6E6`z=2ml{I+|KJs zo3XG($=wiR>`XOFzAVs0;cPnBk{CX(wrJivFCpA4+sq8}8!WOrfs*!&nazc#DDa=IY zo@f3-D`O4rJ-eon%YzRh=z>4NJ0X!$`zHcgfR{PmPfP6Y>*r zq8#G?flPu`#PB)*pep`p9f(W*K-$E}tnP1UX}Rrh{sk-rX6z=mvM$^rjB9r$9>7q8 zCfVOx!)(4oP2XF?@M&cLrW#>URr(vlBmk=IAuHO9vQ*HDk zC*>!Cs@H#^9AIUD@}vlJ&`tRhlZQ|(K>71#>c3D9QYOCu98!yl+}|WYm^dJ*OY}!8 z+CP{N;8W4&H@+N$pMUF?jNtypOliqV?fJi8)gXBwp(f=3fa7nhnwZmEYeOs29_9EK zk_wSJJ^?uM+0|8lkObQr=(3JI)I4fq`v(>d9;Bcq?bEd2|NVZsWm-^H$o@-N0m6ZT zOw|mcY=7gp!s|k*7u=^j|DtM~qb_$PI5I$f`&-pw309TP2s2Kq|C<&~i%J4Oi-yUq z-?Wqf_%v>MJRwl#sQK{rKQuxyNeZCaCRwGRA^0cCt_98w?6L;wnA$&3_HWN1hjs$$ zCVRm|=bFPRdQ*556m{~2q(C*~m}4_(kGm@remi!tx^9nw{;y-iPvHRbraMLg;ke6B z-_~C{!sR1Q?+mOjJQ&9hq_pY0cO(G~F;vNqdcj(pUVj7_d~6X4IH+RhzO1*s+FNk6 zyE)P1?+^C@welbRwVz4te*mdEHW!>BD-^R|u|4i1pZC~+e-8 z!fys0`J|<5EsXKgPyG3}ezd5rHU7||03z}_6jTy26kG*oynbPt7~l=N!TRthq13;|iphKTG?os|^K1Fj1HM}JBeq@E zxd5*^>Zmj%cbxcPqnmyF9$P?#oPubij`J-*GKe|;Lh{%f(zxj-pa8G|5N*(y{5O&z ze+l#%Y;JyMTF`2beN@O|`rH9G;C#N`#tEuffb40g=oDl7&6oFk1&oS?=S?Lx^io$E zSvVOr=>uPq$$u|f)4Y`0i5XN7NdNn?@nwa5$+)}o?eB;_d4E&|{Gl!QG*Vu09}us$ z{C9~}fE@Zy4YMD%65G%+t=gITKeVzXh#u%jz1QWxunejkb*8lcX`u9vmgEYnA0@zVqD?uR$Iq;R0Qoe|>BnY(d7@!Fw#I<@HeI5B zVLqrM0Omg+HTwpb@8pdhbrUo;*0}xd`%MRi0w?GsR2l_LC`#aUTY`AtnaCbM+v1|4 zyT8D$;U_Dkx&gUGw9fr6rsXECYt6L4GzUe1$7$0v-{4X%{af0e6i#$b%vW%MUE849 zU22+`q`H!=Ia*;@Rl3az!T-*f0(@%t)~MUz9LS`m3kyMsENqUoiELu$+az!#6qdfp zQFh_Q9gpJep6^B44I0r}MBjiKt3Fw&>Nq|F)I1)Ap>eWS0cbt@V)}s<0+eH}y7*_v z-UPZmW)K23!$W+&?)^ZV4t=cxl)L~p>_%*n*W_7;O!JjNQS3|yN=64%d*`71Q_JU| zx2^(69jp2QGpGjEZ0$@(kFJ0D0$R9;wI&oKZxN8K*W=4>Y+W@iPzY={N(@#x*KP!a zzp_PN3wTIEraDss^?QNSwd2m|AQ>+TY(E0pJ~;*QCSTNLAErF7f2#?FOpSE_NXn@g+VHp{6wgAx$!+_yH%4AT!!r8eu8c^0g! zg56>cV7Z!2bFe3CszGhvAV<6Obww2KKTU&sTnUQV62ZUU7_i@BW#++$ z)u-mQ83{EEuXmuDuXx`(%FlmTIX(ad11ks0UBJrG(PpiVyrl{f!Eq$Fz8xqBfOGSe zu=0gG$&n7_h&%#ux+bvsC|q=Csba~@ZHHArf|0VIF`%-@7Epx+kZ|eFav6@o8U%Lj zF3ktdr5|O%iW%Z9nk$J!0dk-uCT@uX*e$zRzFf7w@QO;wI*1zEcCiq7=9$AnZj%Z? znTNg}f*p`jOnEFQ6jxyT)9E|+;syu*)23g>8SN|dV((L{%Rec^EpF_>S@(!Gc7xma z{N93rU^+&x`WkjG*)wTj?84DrzfkRUMh$1Yf0}S%`8=xFs>1`!*Oy(LpekOTy$fQ0 z2TXmgI2{gAU^WHAEi*H;1tF(+#?-6sQjKysbX9&k&zB*J=^=~iOaZWt-XZhyOs5Wh zv{JOwTxyKLyAj!bsvrT-&lioO^HII!b8P4?$QR`+LA~!))?T=0^HLx<{n?fHROMPv z!i%U#_ijndODu}BEh-%WErUyer?r!_R3{oTGYJ3$&m(5#7)l`$q>6u0023Rki2lQ?G#ixpb`d3yAd2Fmj9|bsBkImr2x^nrKrBS9OEH_`g zhr2b00Dm#&atn0PcAf<_fs2Ez;1a6!EqqWtV5s9=D!FNx%+7GL33OVLyR_db9#(*X z8rEx`;iW+L!Dm^SiA7Shw6TZ>P9SW5J@2wqI{k{`fn79{;Y@6nYDat_yGZVnUb!m{c9iVsbx8DQg9*-3Yqo&INuw>wB_4Hm{qSjWy=Wi2E z+_R^X7pnM?R}CUriL)Vy7iYzrHWsQOKYI9PQ`;o7Fb&j4a?y^$L+pNoGVMtnL!dGu`a40hI8n54PEYU zoQf6>BnSs#v@(&zRCB{$zE~mT#T*aXV%*~!7t6ihfI`*!*MpfIgNGEyxUK}1q2=x# zpOA~OE&{>apSy}~23hF!|B?3P;ZVQb|FlV)A}K(>Y@@h$woqjtuj-nl=pq>wxxI)lLVm5wWL?|n(Uzk zpfnYOnXeM}$OoUgQEWZRYVCSfNisJwU}$Px4<$75^wa< ze4eY|8Q7LH@fj-wa3EqHl)}j&?A=EZc|=ZeKgPHVQIJdmMD}AP@}~e{XZ~22$J)x^ zF@J_h!FweVen>1aAFd!d8=P@}r5#A;Qu=G|QV$ZoH~PtnPFJ}AE`D^0aIz|Wa4@r=B7 zyq3ODj^^4FeK=iqEkUz49Nylt+nsW#H&kTt{0J5E1E*xkmKy@39-`!#h8IkG6c7fK z@-6g(AV27fVNH#?9tnpXo6)WUZu#j-7lMgMQa>sLT1_Bf#EJK8JtwTAxd&uhM3d^T z(MA^-G$8iQ(&Rq;>Q$K+ zM~9NI?^FCjhZMK9po)XYAuEn0o@ifnkKRF?d;Dz!XgENKmaCqQdO8cI%KG5`vmSuw_}QfGsk)C(1bYb zl)mm<%F|QF*1LGpU22m##3HR4a|;Shkgsc#*Tgz`u*v2lT63}fo;Hf%GMhVpyZ=-+ z2!!onAMS3~!JPRFK)o_<&88wRC~RbE+0lzszM|xqD?l0;A-NOG)SgHhUZU4Eq%C-v z%@yTRFKE&ndU9Q>8v{hNI{)cdYjTtyC9#SLlU-OOOjG-EZ6yQ^%5wtVsu(_+%a-l| zU?J5&bW{+UScPR&T|82*cX#NKD6anU`SrJg2fdQ=U9)B^~G(l?Q zwzC-QRJ7>}^gZ^EeKWh$0C){%O>_z?Rie(?KkU)^MFK*&b4cVuQb`Cq?ROMk`z2hg z!}dL@K@wM5_TB4P2T?YC3aaP(46Y>a;~^e-gMo@MzM*K}V*7H~QeJl0ynN2U1pm`< zd*l%OJdc@TBcs(w&hc8BIVF*_uA9YZmsADGKq(5b-UNrcEyr=mY96iaAs~hJIPAS` zgLjvV%xf(SC0Sl5^vX6+{k}KCTLW(LDOEe9CQGW5x9u6`f+6b#VJ$HcVY6;tHY)@c zww9RWu~>P~rZ6PYJT!}Uc*6+9Z6Ogx*nu>U3Q*R4gq(M-=O}&0hqIP`9U8=b&ixE9 zC<3$##Ok9v1Sd{(G`|5ptB-J?nklNdtxvY1MP%e|;f3oYBxe2)={xs|vNcx#N^%%- z(W?b_L2_^dHVCWz9A&EUgxS)~2v8E&Nb%}UY$kTCn)NAj!)p#Dc#)`-aCpRhuovAN z*l}&fE}$jM(@tFcc!p*|cgp1xeJp_V+vpk6>p;T9mV>_dkI!m7byHW=cg7j_9T#QS zK0-1jDe+gu_TBm@9S_#@f495son=Q4_X4tPiiw@vd1twq42?tJ>?`g%%okmY^{WDv zCZdd~l^PH2nkks&_2xmnVHd@oB$UC!>y*F?u9i_ycC)V1U3#9*wj$Ma9+ZZWEiic5 zs?)W#vF0ZF)mTqhJLV{ssi>!Q0%mEYaY#>Bi(Vza z;HZmEv>>)3t=y%1#|ZMVpD1j&7LkJ>W~_c`fQK4BU6yg^@uOPlC}^miYvZyF#ialP zrccA4`q$H}sIh^LK+*4-grVj>urv)Ts5B1b*Gt8j(6@TC57Y1zj6agXj6<#xUZ+A~ z+SQ9mF`#tkmK&rA6ZOdPC7Y$0L^DysCsleYEke1#2JagUvCd9S0}n52t-+jPQu;dK z&9!A%NAaVv!_E_B{3L9QuPN4)PahL5sEzCrLcdBo^E=+4r+7KW0A@)Hu7Pl;vcAa4 zpFS#Z-;eNMh&j~;&lH-#jWh^L*8E0Hu~sivvh(-$sp8q{!W7mm7WL+qR6W$7oFZ2M zD&2eY-L{}!ShicY++II_bU4)Gy_zoXZlUIiyo9K=L0Sf5vIzKsKebgMrw8;Dj<;JJ zH~p2Gi~M1vz2t^+b7#gzpXQ+rvCfYSVylu#$e} zIV^8NdpJ=>hqD9f7+8JXqK+Mdp}b|ev;@{wiJB}!md5EdD*>w5}J&l9|b;G>1z}5`$Av z3sHAeS`(|JdqJTbBUA%a&&&sl9oLRfwXM(=1xFR0>rv2pfc4B?L>0M80z@*ClX(;{ z!E_O`EFTrF&eFs;$V<*v28P+GNY-{ncwZm|ZRv&zBOaGIzx+u!{eow1venfB|0!)nrW_=1Iv&sO3`V zmiJAH@S`kD_h*41%b7~+K_t|<8%^2D@$!2DbBFsZa^(r7kbQz~)9u=%e7IEOhtja5 z(n=7{MyiOD&&$J@LQri_4Mc@0LPE;;13bc;>P@i*IZ&A8q!Jg1#y}pd-@soGWWJ%& z=xnZ}z*I@&1?pG~f-Dcpqmc49= zmmboc(d({yJ-;Ilc|KAq@4Y2s8R;-Tp);S~@6+uC<+(^15E6@MiKu6v0VPo?(6o&8 z+-ZhBagZBWrO(PjtHo@IiwzmKFsph=)HZwps#pXl-7H@Lc|vSLM)z0HVn5cJU^K zlrg6gpo-!C>xihoej+F2xJTW`8_mXWot3C#^FhZY8=@#FJ`Icl;5B)KlkRoq9b(}M zDSxcr>;7svjtTb0d}`_lhIcg;G&=xRf(za<632c;l9uvD_XKlV3mb-gbH6 zZlvnFHtNivJB`Bm@G3>@vlRfWoERV`zGU7y;8!zVFM}5x;;3#Zj7<^C(tV9mKA%&V z9beH|z+w?L_h^H6G*gRP?IdX_{+N|)>n=Da#>(+@@pCn>nG4AOwbavxP>_tTXwa#u6u@TYp7gW4TKH!r?H&Z zG8rVTsH_;v&P?zNjNitjhaN4ry|?2tN3fQt^#1>Svq zdywM94iWK3U}6?yM;D6osGf>VgNk;>0bZQEXSM&$Yn4+&I=FV(3J+StH~LAZBmAvj zLlGI#p&p=Q-oqtz)bd(?#(S-7&|&rVUV>V`dPlNfsQJHK1xUs9TuOPs)7ZcKNDKpc z$wI5BSorEju0bWq84izwl0B%#H+{s$^8^Yx2-jAwRB07`xB4@4+SylC!cxxG zo`!8(Uq?IE1(ST+2S1z*`CcaX9sN#YeCfn_MWy2b$&Br=9)D0hPU=nY1M|3tF-Fzr zDj?h88-OM|hNUSB>)q`<)GeGmZV&ChiET>adXRb_Q;HbUCZ-bJUtFL9WrO*KYEhpZ zmITVmc%iN0_<0SnFTMw#ScB<8=;NPKy^`vUeUuc^9J+JP_p9CB`#|&FiiJE=Ixe9? z7s~2PqDm(XJ(GkGX8f3SD;S?!=PH5CYfBG5ZN-!8>Fgb!ps=b7{;?~cENIxzE@}U` zeGs^u)4ie^i+av&pnf;YmD9HOnZM(l-c%lceDdQAYAkPj6Icsr-H`dp{pt^G2bCM#AHk{-d+NCMeQAH3*y4sGj#-Q z2r|ScOZDlc-H4ok`NC?8i?zJULQ^0~H}t$|v}XpYNW~tl-BPDy+jW=6+JMdgXD>fm zL9RHO4WaSpSD!Fg##4NPYWYhn=FwPq9ELDH0gt;pFYz zxmpGlNp?Xed<8T7nZ8w(M(4kUqid~Ig?A_8`VW#+(Ge>0lT{157=0)#JZjZw?W4Y^ zHHtEDaL8yCOM^kwgl943T9^aqP}O&L-*_dDGHG{NoI^b>6i~g$Y!fA_5U&8S01r}+ z=q0>eKMjtaxz|MvF9%84#N>wd9X*4iEEboneX4hhdojI3J&c!`Fzh0t6fks5UQ(!f z$Xb3aM+{&Y*|KP7y9La(@<))%0TD0c6|zhTX3Ot?5zs^mad}Jwd#)p$5=WON5IuW@rG7et{^mty83oVfasw)2Y zlQRrr1eUp86!5ld;xG!fWbWcCU_LT+Hkz@%N7dslT)zZL`#yx-dgUikmqQBRPs_YEz{uTR{R2fN zKK!zV7YxekFk>TfiZP%w?mMU0g_y27mpXft*ls;Ut*z+ z0pEmm;Q_#-bT9^pSJRH_q>o8=|awz!Akm>;xlx26(cD}DPS{)vi$CEWIa&_!fN z`^@hq$q<>>IwCu~&%8cosYaC@Rr&>X~TH>R8+cY@Nu1T;r|hdLhmUobmbD7&Xa2fkYxT4Nuu&Un5& zO5sPa0ya-F6heCP4|UJ`eVA+}EK2h_wJgR@g-HvX=m5f+(IoOC1ihcR^AEQ7vO7<^po#O!WXq^{l6?4i zY4L-NucUvj{chg)EX+@{l~{mCVsVlP{d*JJTx&nXvKru~pY~L%>~Rc{l=Vs(l+-L-rD=`bZPsi4l#S1bTk1(n(RT>eY{T2K&8qMFFpdppvCsehcMqR1 zv~i2_ix>9#KAOmM=t7Sl&hc+_tb1nC_ayjoP*1yWThm3BP)Nl0JFS==X%c_tDot>N zs)o4F$Ua^E-wEs^vn*)k7m)Bk{SOc4WX3*^P_^?EZTTM_?r!Cz20sf)_;UAuII7md zB1IsuwNdH+;bD&aQqL$qm_4>+pX-mXG*_hhT~X5`5Ap+Pvnh)ln$5g_u-znE-XF8n0hU?uN6c%Uq1LN zobn+wcRYQ|vFO@wLiGeXwzGBIZcj74!_9P@Knf>(+aKvQ!#s4MsPa|g`b^jrFF zs7jRAsZkz_{!jc4z2hj$`rVJK(8C_{Nb7qN*M*JUaQOPYVRSY+S8LIosG%`Fzw7}U zWl^PR&!mdsg(CeCYJFEnuQ(zZM(?;1p1d%E7=0ajzVa_4iM4^JVjd+HY$*n!!Pl!E zIY;wpo)X7QxxzQr=gig|5JnnoJghvqO5s$`5_Ddu=!GSk5?N(anO4JO=`TJH5u@OixT+PVNK z^G&Fh-_)a$I<}x46iAJ(*ZiwRR%0?!ke4J6X^4hSMqCxs1x5114tpmb>jz%3wm#=N zFBK}316wYmIkKk}GrZW}#-^b{oW^o-;lzO6=QDS2%>F~qaRLVY(OS4v%>GEm&Wiwr zUN#!@&?liqxqBGHN{;&a z6GpN;OFjTP|7fi_&`*xz4Zi_XncFjVbe=e@ptbt4ZT(z%!y87gm>OH992(oAMdERmV2-rJGbZ%yY~de3R17gy-G3zG#GH&bfedIJN<78dZy zbsRX+Y)?>S4Yy8baGKmjB`np7Tljp~(1@#anH&ilW3bT>S@*9lB%gM0S-fLbwI)Xw8Z7Ojb+(x6QQ`X)u%xkH`nEhy4JA@vuNti^~i07m_1;OWO3bjXItZ3d#M z<;r2?VF1fC*D`Jzif^cO;w7y@^Dn0LJb^YRJ47MtRVi2t(o(#5<8rDVc`u`fK6$~; zuphjRHV)>-ZoL-Cv`K*0S~_>*snQp)kIN<&(PaG0$YGaSIVZ*F5kSl`d|t>%Hq{zc zpR3Kt=!KYvo{V@PscnhLo01-hNX8!KYff~O_R;-og^DCa@4Em`*Yd{vZaZEKT2W~k zba8(oW(~yVst1!1tT;*r(7DiF*nXg|?*!;Uqm;KzV3F?USGsJnK_jnMiPA*nO4cS1 zV^@|ux6C@zA+@2r%~3cMuEnlVsh_bZzLru2i(*O`LfBRh)ma5|Nx&*oBA*LGax|JB z3cR48RF9O1yC+82(MU5LaEye%@I-Yrmfhjg|EyxZo7>Ot?7122TH{zCD`FD=kC6QC z0f1`Kx_-@76A%v3PXaT7pDaMAg0q=Z8N7GvEJuBa$aky&*WfOsQtd-z53jM_D88~mKsymh3oudE<9QW4Lfnf-n%wXv_boDd?<%D zEz9pkIjhq3$5%Hwv{wByg~!wK86+!RHX$u(Byb?aXhl9!Un<{1X~u6GU{a+Gwg z`#V?J7tHGb*wO%j9#jyJ=~3hgetS-MZC$9VOMaWF_F_=lauFD}a43WxQ}x-QY)TlF z@Nv7KZr$oJr{&><45lcUrX`+Qz!1A7*j_)LXvEmIb#|<7i{Hemj-0!KZmEvHC@(v@ zj66b>gyi)1sQlXCj|8%Zjk4#&IbCS|-LrFwZZl@YfQu*%oN8Lj;u7#jCsB^jdu@@E z!bhL$Z;TGES|0&K74Kk7eq0cGaVIPIsiTwYp+kUEc`M-#4FE z6v$Q~+Hf;qqk4ls4uy)~X5MB6ekf3i^yJXBq0$NkE%8~pwij&-A7oBFoq9EwYK;=V z7FMwogN8ELAP3ExH4tLw23&_4>(AXGICW_9*B%dANokf;ahRgQ&?7S}#0>ub=^ck0* zl|!*FmGBm{uPP99Aw2d$WA;)OGfDGStvU&yTy5)jkB4m|&wPWVHO`Wj>mHT@$8L`} z_~Eg)2nvn7U-5ZiT;avHIt#7~yvCEp2R)GC_6U*# zbs4t0O~+@sr}<^tNL{zPTuuL$oZ}(uYq1q@7V7>T`E@sY;yx)}A3mq^);dzBy?Q>M zv<}!B+BDW`O7A`vxqm8kIZ%aE+n^y^^JdqKHgeRdVbvNij%X0?AhhckmUE>L) zHf1t43I`|?Xi*ESVLjW>=ku!NAqX4D>6yPrA0N0w{-LT|&uMgq-+$i*2sOL|=IL$v zFDmxQ?ti2A>m8}(sEZ{~jQ~oBaJHJ-IJ>1GVhQo00`SDj!N3-F~{Dv-#H~dv&i|B)aeY@5wfH=R?8wY}pKkc6@d0_3=40YMyeVTpkN&Z4d(4 zZ_fL)ZShG&`oZQi1D-5cH50QfF$vjPeyb8l4(?s5Z@HG1!N2C^z(#U|9FHY zb$BJF7+NvAZ4h>YIbt28NndK_1t-8T2-R|N?8nuHXgtyg^lvR<$ncq>#WMcy)vx=u~2Kr=73K2@N{TA z4K@aUq@6nlwdQ0)^j#It-j`y>GB#LF6|v7|Qj=m{Ifi1RIt&?&fY6<1aWscrIEQhI zT6QOkEO8sKBrPm4ockRwk^#rd3HmlEUvd=Iyz~9M0E7Xkee;Lf${JG3rAuF%8E+X? z)9J9vaMnuSP#Y#<06PVXk=y+&seVt`Gsd{O5 zBF_R3mIG$ugjBuZZzZ1yJ-&XQ%zx8Gpj7#Vp2MW?VPyG`cd!wR=qJj=P}&V!uVbT! z0&JVa-n5*fa}W0COYqxXL>Q6Nw7AmAuou``g6^@yOAoj$+WZS3Z~z6{!H({gV#ZO0=wK!c_Ez;n8)m-BJ@# zw0@Y<(KeUn%p(m;Y(lB+m8}Qg1DR{*D96|==J`uTCbRpVI-PXe3DK4T&g;@GWU-+3_pis`2(6c)y(`{q=;Do-^V3F!+w zAQP;~&}p;V2fDaYG`^!NY)>GuzqS+K7%CzdbeUJEkT?s^^iC3!o|kbK)+K(mWqa)a z5p7-J_ytxcT`W4u7e9&LHD4@b@c5+GU}`OyM`iKOo^#c51AJqK>E8e+CkMp|&2efS z&HR2g9UAw0yrz2z9~B7Ad*BjOGk2%dwMVdHF4Y{Pcl*5^&+Id-PHnk%mIn82Q z=aImpPoh5a22ZS7v@X1?D?RCPTHgQS0BAR!w>nb(N=PxZCE6POY-rFs=7dmBq8rwc zB~rCnbk^d(d{T684Wq=88Y;dRZYu;P1m?wAo@>T*#h@>HS<lYld+`nF61y3^IDgvsfNJuUr%(ApE!ew=B1J*L315Z_NO%nMF zGcBH?rq2%?6btpxT)pkFmHLu(P}h3xG{q#uKb14c?^|_lYE2Z<^#y4e4C+@0q>Y`s69q)wDY zm;B8w2WYjb2)ZNUvU&V!yZFOfM}`TQ(9Mhk!)GDTF=)i5l6w6NH~n*Z8B# zrf|Mk4ZJCwX=W50mmY1r@|yxKiEU&81q`5fRsm|{`{suP|ELn>H`-Z0%tR5@I8-yJ zCH9vx>OC600Nz5z=$@l{isbEp-**ZANtti!{t*4!b3C#fO_Qy^?yUH?MJ+|3m{oft z!{FL}VEM7?KPlPJkL?mKh7LQ6;jb&a`gPTiFU}A4_Kd52D=EqOgdeMpGqd+(fL79X z$Cf{!kZgH*5U^zy$N!_kJX`W;1LCaQFht zU~PRm7%o^{!noTeJHoH|&dz72OjBYPYmKRPw%Qu=*?obgVBg!_V046%l1xzl`P$LO znHkir#1Ty<*_b^e^5R=JPfY8QOI-crQl0x_^auI4#pZsAy;I1V82H{yu&qSK&V;MP z%#iWI(eFC)FIS@Xs14c<*~VnyyquWLxKRFva(4?MK8$VFKQ-D>pwU`s=bZ}H#Vxeu z)_;!Vh5;?z{AQvC(Hgu-)%{_ZAR&r8+=lr1%E&i_P_p@{n1koT2)5OEtvk#VHDmh3 z%}&*CYUY7oNB*j8o20|rV7ejo=}lM_!aBH2s<|^1*ht`NmU@0!L#splN|FnY7DC#B zRYYyaHIWte7JJ{wMyyv#KuhJ=w-Qvm-!^K`v(g=-YcO0h{jlQ%;ja)#j@NpXv$ce_ z&u%WVs@!BHcyC*Mm2qsZ0xv}J;{LB2+sxDu_4CGBnZ~U0tj1IWkgPW-c54n9y-IZBt|9t47~P38GYo~ z&SqX){SHocxT8PlkCzn^@-_>?rX;MiH~-v&@#2ur4grz0Gv5mL*4-SmQj)y8w?$C# z^UnXk)N2w}2iNsK?6bWd>qojooh{bJdC>RGv0vBx71Jw+-^D++yXx9RKn>f%^}*em zNQLC&88`>`btLa*1U%aa=&xT9ii8t3B)qQe*zmEYUctHYv7~CQ?AR1^K}W${VN^V; z`1)hpL7g){6z`v*M(&w`SAIIQk*W$FtJcXh>t`ZWNgieXW=PK)#jySGu({!u(^K5_ zE7|rweE3B#I--)^ATQ%(NsU>brKrC=f0ZS*rmoWDcRRK(nxwt$W|ea97;v-%mA$kr zWPNqf)}W7f19XCQZ+me1$!fcWvezzE!bGT0vHP?JLsz2fI(uq}e*g9~O^0>zUZP2Y ztk_!7bgJyg)lwfcoxy2_Bewm~nJlThUqu*?#-_HfI&^5Wr1BG1&}|+aUY0YhW8M>9 zw(e_nu65V*S0yAiOLbX<`4L`QCGpLoy3^~K?LAI~8GBwmdGG|Wu;)%SYuhSb@1!I}l3GQ-4qR=lD?@&s9i!k^KTTqmU6dBCdL*A6T#)gpn56q!qmFT2)|-vycCQl24qm$xG*Z4>tFvB< zNM^JR>E&03t-nd~CQJY0O>2=qT;9ucFLd{jGhIv{(q#;1HYh-%)IFobTj)_>Z96jW z`E*Ws*CUv-uv>IuGoacf6a2P7%?=pJTUYs^PnXYUxzcfDMZ4`5Idl9swQ4%Oy{fh_ zAGdH_@97K2JCMRV2CfcIeb>WSaD^^qdH&A)puu^XMWD+2{JpiY>fy;FU&pMG?C`w- z?KYfiRj3FGIi z-9|J6;3|FUGHr;_HE4TyuS@7j(zjPGpCBqng|y~u%TO_2^^B@&Z&g9BRzh;%yJ$?j z?$`>;SG_L>m)0+MXqG5T&Nf1EQ}O)+!lv%kJ6D&yoJ+vOX+Gr0oaWbr?u8Y{Q8x#{ zfEV})8Ih#{n_3jAaj(Ap6#8KY+1@tysEGR!hxV2ZuVGU!rQIg@Qdz*B(fFW>N3ZD? zr>#qRO9!93B{Cs>!UoB)UEY*1;)H2|2{#90Mz@pP8uOGNL7g54jMRz052HD$*yCOg zg{R{dh`y)i`nuPzWQK!!U|Ugg_RiNZFc{?li;!}uMH-v(*$kElX)SrnN-lfCqSh$S zR%PM0u7vT04Ro#0L{xT_OC^L&|3F^ASIh2c9!B?cYve_+-N3>{HFPz7v5$2c#O@I7 zrL`lk;vaht2E!?bW*Y*=w!5oYB>Gv|tGZ=d*ZlKaUeCJ3Z}kyAz#99ktxZ ziPM_Zp)}E9&^1(v{k!XYPXAp-w0FRo9hkIg&EH?W zxW4F2YLxjSLM6udHZz((tUC$Hs4j8a(rq@BA)fdZv zzAk9@aH45i@-BX~5!%i=w!MuBco^ofMqq{R?&{L`tr`{q(;)L8ss-te z%w9mKd#!uBt2Zv^ws0zW&(<$kXIqH4c(&)pS(fhlpNajM8Rp+Jn&TmlP7kCeEMvs) zyolt1Z@!%yE@o3sR#Z@uoTX6L(x1hx$=IQtdxUPnv0yrLxkIwPVBwHlBH=aJ&T4U-h)lMd--aARlHh-&&(_N>qg}?f9q2tG-b?2v_35$-HqU%Ts z(iHw$vcYAcY+}k|B8B{!eMNubE$M6tGtiiK!;(eKzlKcQJmP1*RvuSkejAev!E3R#R+wS4^h;4~N1&<86e9oFuhU!|? z6L}=f(3Hb?NhmqGmu#U&qW^u8cXD;bZR9;Gu;i$jVND4YCCR|;jH#YT)gCiL=$1-975&s&kK+BXA7*GF^TbUX3HCv|G2u- zH_4er^gK*nS(Z!0y76^fW$;{@jJ_@8LMg9RJN#fztntJfugJJqLGsL2F}}elhfex+ zmxU{}LfD{-ZM<{h;|J~P%{TX|V4n*)s_rWzvK81Disd$Bt&?by+=eI3!Ib~9wE4% znvz6Q^0$!D-}Yy|$q{X7ma0;Qi;oaV1tIb3E58WB#t5g4tgGC5xArT%EuA^&#vaWWLbqp4)1st1uRVmDvR z=KU%KbE+9`X*grgC~WviOa;3%GV8nut61|-rs81R*NPi=^8XVJCHsPq;rkIWscv0~;9*}uJcMQTtxHA{?# z@#dJ|Q;I)3c(O)w#nCyxPh6#i*|n`|I86xyk11F^(bz84B0= zF1^QBIfo(jS-Ty}OGCxIPzgpsT-hk$c#FV=@rNnf8I87%>>Og?$aKnf)hRqt%y~N8 zI2$&0B}vk^Rz3Jgdbs006vXfJ6P3}o$8pmi{pi^b^4pWIvDd@NS>vRA*1h{9=rHcWXGheVQVEl zcXy?~ZGA9C9yT1T&l=Q$LP;lx$p$SteStO#2aHawN4hN6>LUe*pj;0#i8m#`<<^Lh zNvu+EnDm=SGc`-f*WA$N{nR9+Z|uhS-zZr0F&kh|d4*0RG$CEJ73E49d9T(K*+&Y` zjXjd~v4OqS#nOElXX@oua+<1BvST2sc>I?x7pK{rx|Rm6FkTY`_?g^H!l8!l%&a#_ z$(flh+oeAxcEPp4=NtWoeQVaN?xoVvwWqz=H^~nheO%i$C0-yRWN~ujCqAfot>^Q& zMBUzUMg6ejeQCywjAXR~Mi+_hFiuKA?2w3a`_=dX)gyJQckqgJ`EpK8;ViN0WA9m) zgI`=3>C@aUU!00>W{P@`F|}N|yHw)ho*{ClG`XF0p)x$P+Sa(*t*)fQ0O^9(`y{U- zIg1r2KyWV>w`Mh&i%%`4JfO59ME`D5g~>Aml$z~$J71JCOy-hR&xXmA5MT%Ag?sYJ z(tth9@?Ka#oe#M4_28Av@gz`yo_UiC;HihQl8>!FWqbL=+1JdsaOK? zF3ho881^MmUo)-s2Ff;u^pZDyS#r8*Y)cIo=7LYf#lrgh>%{NP&3=9A5_f|rrr)CD zf9RG*8k$etYu%(XJC13hW-dc%qA`^EiBCbK0(M*Zk`-R1U%__r77BgGNt<8eC6XsW89 z4JC~2R1b2?(%qKw+TzudMK0PwN2pJ}gqd*atJf@zBFWLH6lg3t$nN~F{{q|o>3Nui zFZa-74*2eYbjfMh@M}U~R_tu!nIzvKDCzmI!bnwwFyX?3+TlaK4Ce~WGlt;YcZGyU zUuRQPEjZwfRxqN3nW91#x%tLi%!5iWULv4NY6z#Nt8{iY^$S?~eZ?MekWD>sAFGzw z-7K;54`%J<$4#j+TjjY_9>2bJ;!8!?!mU93OgKg5%7yz1U9tCaL%7z8TGPnHzAQdS z3ai;b*#dR=H^BytnWmuM&KFa__g)vx_aSQnby;8sKa0K8Sz9=2Wyct0#+oY4)3H&B zx$_mXA-?wZ+n9?->zhQ7vgcT?Kn}iONgwZ63prI85IOfaM<)xtyaox>XUL3cvKtrK zez3vr{W{HVCSljEt$Z%>yqA?R+A>WgmK3-avZ7{zzF{WKPwE}3-Mi}WHPKV(Y|!>X zBJ~@(`U_7X#_^eK0v4X|$b68x$##n~xts)UfJZ z&ZID1xYgyZZuw45OWG2hE5ZsY5Z;S$uIY`48y-V7Ie<|RG$F~(d5&VQxZENy=U3t--QXbja~)e3vu4n;=5953C$ikwVyGd?MP}tp=#YM1f?{ zt+>#<(Rl{%{yW!*6R(6Tht93 zJNjT_1RxqDC$#?Ik|gu@aTWY>gMgccjVUbKRFEgZz%XZ&P+MT6$Z`-kzpL0XN2UIGAr z!)g5_Q2=8w8z*8FnMQuOx^Bh+=cZD;dz-f;4^i98_>~T^c{M0f@)jW&A#tf~-ik%L z^(>b6U?F+JCg+)|d+I*C>wJ!ps%Zix{^Dq6kqIc%L{S^sjU_1s@`m1^Z7EO1_5Wn|a; zgk4>`id-~)ob|lI8~j-k&G^yaf6u-lZHhes$`_r}viqM?slh&E?CM5YuRVV8!HR%n zAT>ggf@8lbIgXc<$0`KyR;DTzOvNX5Rz|SMfDKTx)MDPR2aL@rgHc_kffuy3^{7NY zwb*;aCw7tTt@APSkuwXE1yix7t#VhlUkBlZ`l5aRL7jzh`P8x-SlpnEU*S?)eU)1?t@lHkp;X{KEAmx}`qCH$FG}Ax@$gpA~-D zYL6YGkD0v|T@lionHJxGGWEC3M0<4T`U{*AX`M@)HYaqjL^CF=%9(%0VZQECU7j(9FrdC^-9Xjr23@70{lw(e27U zzU|dzjqM@Pzn{%9T`DnO6>hdw7V7O|Oy|XsT-Wffki$I5Jq^9V?hJ-@YbtJh{OM^01=A8C4io@}V+dirtX z+BHn`di?t$s534~e4u{sYk;34>6ED#TfSX5hnGjOp>pONZ<4|cpEHATY=@+) zeQ6Go!b)M9jnkZj98VgR^(01Sh)D8TI}5__|Xz)uC%;#bt>I(Tf?5D zv%*VVBbSDd!f=$p5@AGO6|YI5+VA+vDLG@6d1NGA+#;_1@I*bIz@406L8oY4PbK}3 zOtgq2Ml5aYpqQ^7z-rovNspr2>>ZqoepkA~e4R?y$hC4P(JOpSsla*f-f&W>+Dc}Ch{QfR_ z{}{_GyT=_ZUP6GSozwrdB$nTn1SVbsuKGVL$s&$LJf&uGDtp_m;sA-!iC?_r&EKi? zdKDu* zZf{|@^fu+4!7JGQ{EA-UD{6jY1b(Mh`Fb)KRO=S_`)zg~wFnGX0qXkhuQTEQ?M!lr zXX5sQkS5kc6r7FZ{z=_iWkJMJb{b?Euzd$fj1+%~bmRQF}xb#KA-&yznmxZ4ps@G16 z$oxVK>2C~tNFV$ZrwPa36&^5Rg@@K}ywC6T0G&Kap0~GJRlMSLkNnP7|I0dNfJl8a zd@6G+xdclrN54JE5?Zdva&KFWpHA$Fp9baE{RM67Q)R4tZSLm0Z4}%qb6gsKwrEEZ zEYJCXsJtpNe$LQwZh8sss#i7^)~@-@)deDjRp8AKKbgx-azplKML|fPU1m)9-)Z6r zeo&KPM3_%F&T5PtE3xE9=Ueh}WzJVf^@uK~Dy6CEJ7VeBJZ@t*$%jpgLB3iF*un>nHx`zE8D}Zy73klaeHnL{(o?#+ERcX>E=E({X3?AdKBEK$(j|RK#4zcnVn6P;s3F0^h9LRk3{3QHO# z@lQ9b?AcBBG!`qH_;pz~cb|Rt{`R-`+5249SP%Z|>9^C-JX@Vn*)hYtl=f;8*Uqv9%SvUx35gQJ zB`rvVR)u64fzb2(RaEK{N?cmUo%+OF7YKZ2%)M}QvDxYUY+rc%cX*;G$vU&t9~`&O z%K!%&+V43hpiV?Pe%OWAZ0(^1k!Q)Ec?BL(o!quQ?muEk4!h6*=~{{$kWi zT+1tV?cVk#(k{Fp{H~SN>1)Is(@A?3llzhQ*1Yo&Qm-LX)?P>A2>a*fc3?6Sw)9Y% zJ8DT5!j2>+5phsgi%%C|m2N|va4XIUgp>S&*@iR*~b{Z4d;F3}qQ78CJS!r+t%V#W7krzDO7BqHBc6>^Mn zgge)-QscDG4F$AtbdO$2ToVxRJE|b5^)*4_WA6Y?{+5#+Zb(lypqwm+L>7Jhc|HrZ z*doXF286U^PoQjMP)#=+R5(O+#Xv-XKq7a_Z8Bqy>vaVx>_8o~H0%Jaw1Ygq;(KlQ z*(Tkn{G>OG%f@5B3`dI}?#KNr+rF zHUeY_mMWe9(OHS6E z9-4e^xY^_j6CMvM77org{7QDZwSQu`bfF(Vl?enNsY$KFLvsl)(5UdPpIztyG&)rLt?EPdV@B5 zSJ##1N@>I6O9!R(pI<2014pGekZXSVFO%nDogs#Yz zQEU0x055Uk_@TZ>wH2RV<3wwsJJ^Q>utLW|1MPOK#O!4M`^&>U1oH)qykyTTE5E90 z5@x=cM#~NvG0iW;f&;ws*FU6Eu+*; zQgxwMbYM2(lDczGLlCrr^LH^s?&06)svmKPm>Y+1ilM6kt zv2;MNRX;#n0fA^)b$9{6w;n050u3qe%(=bXrV)LFm#S3xi4b}U$AkKFGMI2_>%Thn z?N~m8uuat|vje8Sn4Ty9NT#RvG}MWAhU@#vO0_|+4etjC31XcvfjJVgfL(asDy6r$ zs**7JtiQsJ*Pdd>TjFQSt9v;jpu|o%bH%WKLb9al?}Z~l1;kv(3Sc%j)M=vc6xvrn zQ-p|nASAm-rQdqOX)wXJFqv&Np3QT-uhhcQ)-U_F*J0}8n{gN&u{INShQ-A##{p(= zBTheMjzjj`PWT*jlq3LOC2gI=QBl&U6%*}-lFl6^0Ri2|k zshxh_ zOQ6wmpMn|25iz2qU~D#Gtu{iVq{5ai_z2iII>7?< zkp6JsyD&18EAp+P-(sM(y(B$S$iCK*BOnif_Yy!Ck9*XRjNBe$G7ui>L-RNSBA52O z#1iBs)z9P*GsN;-6bvfViX16c&L?(t;F=rv(`R2atO&K-QBoM*H{6 zN-RwIW)giwrK;iZ6F#lXP^YJ1bCSV)SkbEK5pa7Isffq5@zWOs-uJ+3AyX^CyU52) z4-^-{_mDn{qX9*86ppUeokrXbG@2w%^7byqUhZ@C&YL2^QR_~l>aY?9z5FV1Ds88N ziz^_&YTap&`k7a`X69A?N3R^36Sj;YT&lkiIH&G1hCp2Y$sYgv`;FpMoo`1!7T5_2 zH1KxP9I8?J76lt@loNT9``c6#_s|?MQvSTh_clM*+A)K?)EU3hcQZ8yeUvVgNe0f7 z(qv6jrQ}Lb>hk1)L5Rzojm!@%N=j@1R{<)xM;M}2G+pVU!&F5()6=$_{vSG+m7W5w zX1tQFW?ZGIY9dPPk$P#2;V2;hT!oJj68*NLRn;A(ClY8HvrtNGBhWy#rIfQieXXjl zrnE~J&CwvGr@MgJC0}7%(jIluN{OA688yG3jUsFSCoD+=D~O><^U^j=npW#(8v+nT z;>H=$_61XIZ-L{9Z}7Vn_uakU5-+b^XSgkV(ioW1Vujd14U0t~uj+ zz)SPA-ps}ZO}!ZmXzmQ=tiT>ok8gob*xmUxvbTD&g@NJ6JAqS|0WR&-lz-;OCVAzI z&Q!2U7eZsj20$N{if%EFN(l% zGE2V@eZ9=T+$-WTFsgaKoQ#`qoqQ6^`yeiDc6g>o+6X`g{A25{O@}YIW%&Dnvp`qw zM%{lL#NEiEMif;1=% zdY_GP&ez{}?-+O7KQ7~U-h;e*@3r=dC+3{bdZ-|O0}qD`2Zci6$=sAwLZMEkqEP4# zSeWn^yBOSX_&;phn;H%%6z)0Xe`u)ZiRa;}@8&A%j_Pu<0){qLtolYa2F9$eR<>|8 z3MC}!YO8N(VeCj_U~Fn`EljsoRYyl-ZX`^nc1@05&Q`+M%>1Uiy|J>pyo#Z_g(1HY zou~+nkgEVZz{=QBpT^b7(%M15RhaJhxdQMz@@F z@UwET7_zf-(D3rJ>hl?K^IqfP=4YniWas2!W9MSyv0_ z8UDS_*2&)TxN##xHe*X;D`RU%2e_8w@3ppOHjXw9W;Xx9qyK*WFB*W+m6QA1#=qMa zE33avI5TsY2OB4QLt`mt=q5e#Hnsv1_Qv{-HufquHkN

5zG2VZX-0!KcE(DZtJxz|GGBpX}_v zE|s$}GB6;Ld-#6xNNAzN7wsd9jh9fQgO0l|H<(xs|@DF`KQm zDILw@521`g|-#MkXfQCi?okhQ@qLAFr6VXvBpMpzyEChzdZuK9_wOe3=jQJ zru)}r4mKu^F8cPy*G*xx{u>L54Z6>U?8RSi!uGFU^7y-d+P8l$2V;Qz`qvP_KmM9B zV{5Qq_F$;+jh=6z&}3i9NM2WQO}#!P}6^5^GJhSeEG_xp$JP(9$!?{syB zgrR@0ijZRX^TnnhN$Q`!x2^sE8~!_^^wfar-sNaCR6}E*8X^p!2OHC0NF$15xxh(wgwEOaA)dj+2XH*Dq9WHE|L8e|idDjiCKr7D1t#oAv`k zLv$A}p1g43feU`o+N|YN zE#=(YoO-btVM9YhQv|&mvpC_eAH2t7)>U|WdwVEfU7sW3jg1YuD*|(`Vs#ommRJO? z@mh}fW@j@;ZaZ~d8>SHzrDl4DN+*!Wj|7)JfP^D@8 z5(L5lpFbPS&d;ZpmO3jq-%Sv3V#<(DVsbuR_T63hk#xG+fdbRB@}uoBucu~adVf}9 zTaTso23!}qyX)GSz)R5F+`PNl!DiRNQpT5kFsP=RT3&wM*U5H0jIH)OGU`Ppc%flo z`rKquF)=|E4)dQIaM_4T-)3hw-Jtfm=;`zGXs1z+{nOFT)D3HE*5?Tcgy+ur!=N8- zMEZPp?X^>LULAi(?Q_`o)lTcjmLH3;1zlE~QYC^>sQV>kSy@?eT&DP(TwL1L znf+h$ot9LINJ-(XR7X}nTI4k^^ra^a`D~AxqN(=~qu?vtNGBJhyGiN8x;_)e!&NY7 zXWQG`YeafrQa$(PVi{Oi0^jM0+1qn=i~jVyXWxEFf$7dOvwJl$sP(=1WW)Xpxn48w zkVlUsO-wH5)@*Ag*T(#~SftMUneyIcbW}iaFm>LWH=)EVSijap6m`vy{Uz5=4^HLV z&)pjL+L>TrWo@zX{zeXN_w!O-owNE*>6rC7Osr&H8>%l|3Bd=eRXxTXNw?W*h~BE@ z1u11UbtegK98CSx*&PkSBexyacEOgS^neAs`#Fq_-FloaGgNZCN_U~mrYg(G?>a*KOjZzQ8hglGh74nD_{lT}rI)VG_Rv^sCxYMw9+xzzoE>m8? z=dQ@7q@|r;t6o0=d--@Wx}IM{`F`!sG+djkX^2xEzUP1kt7s$7=LmOjaFEaYkgxW~ zzJ+X(0C7o4N%`KF1h4Hw>xwy&<%ue_UH5~PvV-Cz>GXGVvAPjpUhcVneYN}JV9oAx z6E=mA>lr5}r;&Vpbks1qPv)yv(`jL>4OMHkVU58AGffZAAy1!loxwxZQ@V^#7t8H< z;*v1iwpsn0dy_B=t7|1+URqi@`Nh0Hfk=qc(hw~@J$jY%O2a$aF@{w; zl=!MD%D4CatyfC4{QMebHHO?|1$Hyi_rO$P;o|x$Wg!b&Qa0s${sug4YoyX_yxb1i^sE)LXPcUuQ2C|4 z`!m!(L--xOh^6=Ob$Jg9>#D2ANmw7S9jw~%zzW7+3J3`JzE5%W>eY>I!8Mx8mob0t z_4rU|CT9)a&(^6Si;s`zb6hwx={8R}T``u6hT^lI#S;0kYdG$GumZa<0JcXWzr*^t zLk}9N)N=HL1wP|H3|6G2R0akv37?R6`=waaf>%uSl^3oJ#@CI$et3oQZD$w$bRn4+gy9W%TEPMKg$^!rq54%&c4m7 z_3JZh0$;IM*pUc55Z)O_M~xJlM>qCZ<`l5jd{>IasQA_+N{vd??3W=vb@|FK^Seo} z71gflSlHgyCMGK@n^!bj!E_NVQ*%Q_RW+@)cGCPXZD2s9EZ$P|T+z++KC_fQvoq(; z&Dfu@OPFkV%EZjv^Z@;of`-P!{Csv8;1sxJ*iKkgQxm8DBSt@^R#X=cjhD;l=qM{S z0nDXlj>#X}ZT7mzsTn=nklSha<3}76t7$v=GT3^Q1#R=Wz`(%rmC~`q%5jK6joAMhVpI3~FCBeEUygY6YCc5z@-soZ7>gOi(^ny?JnDlEK=u5O=} zbdelen(W`zDz$da&5e2&!15>lJggrgw)wu$XB~#i^z4>d$+xMWKgx6ubW83Nqn7F? zjtUSTLqwzpmk5)1M^Ny8$UAax*dC|$T)&>`6m#|2vuF5IW14p=$QHEH`#vSu)PCJ4 zFE!A;e*Hnm^$D-9V6rh$o}2AgtXr+!!2zNr3Q+qKhbjCH&Aj!NLS4STirp@sG>+BA ztUqFQ(<-xe+O4gqxZtzbp^KP3dVc=V6?G-V#x5>ygYh!h_}q*d@2NDs8keJ0CeP5z z&BOD#pp~i-{Nz2Vw{R=;sv%Z79ZheAXMWKx!DF@J6bA7d9;B>&*2CU7@%xrA5QqKgDTi3%= z^DRd?W8;w>9=!1ciG>h1o$T|2zng!aqN%mD(TrO>y-)9*A4d6V<diZMTEVZ|u9OUm=craq$iGsWhH$0j&E2;O3JNHgLFCnkbv$w;^qVJ_M~ey! znnS6QujpA34`@qEV>&}90B%ozX($J}EeSmxxPL97aW zTh%h>JHd%0YxJubm)5b?%iy&>%TLNize@%GQNfw_2IcI`y~@aJtASIFWfNm zYWJCPt2b|nzDgJS6V1Fn-})u)&(4*cb@`iH5JpWE{E6iyCB0i8v_Jgy^otAN)qUkN zmv)>6f^kn})R(?Pr-YBzD=fzjQKRS#$ zX#34*xXx$uOYSE~NM5{q+^Hy+N75&K_8I+#bNub+<)<&_3w~SZTTm~?qP!j3kgdvv zi(+`TkjP80A57)P$kHXs|1w4@H2&q&Umt$)Pshq6w!S1NSdY>XC0CL^uH%=KL=!V9 zP;U|}m&HV35fF&#+`UVDRr3t^&&?1{QR>v4iBnQ`1zTu=C%!y9r!Rl&gr7fJGm_sxME8%6uGBlu{=Md>leT_6@-f?pIb;DsV(}9S^yii*?cU z-zWUOT7{)qMZzB68z1doI{4OWDDdVjjhAsUe?DSlbhN_q194ON9I;|TupC)6PR#gW zdXi{7IOGD(>QAxbxz7fV9sKB&4V@#WyDemjn+cY(M3(N>V zs^*dKc1RFhciw#YT7yVRN@2oB)F?cBp|kygbr)HlmgP;xFd{A)WMY+&x`5kL_}!WNX>5zdsMSE zZ?9lg99XN<(c5hf>mE$77_*xw5)sqReo258kweZ?ANOo<&8zJ+rex=HjJ+uOG}9=9 z%Z$eGk!-xN>2m8$JW(({twGCa8ou)@7Q>pYK4%$m38ONbn8n+h`50iUU}Su9nS4-P zP4Q(w#gSEKd9YU2;Y7>!te~F-mNHF4p6o@8(?+x-_w-kh7z!@t>~Fy-D+}{ zxKQoPQZ!M7 zzNm<>sE!GaX5D0E^3~jRreFyYwp>bRqw!Z858*9Bj}G)7hWNIJ<=)tq?ax4CA|`5X zeteA=*{4JeU0I7=yay#&PYaI&$BEZ(MERnJU|!eFnG>`hlL+xFjeb) zB+qZWIlZ@WIXW%>OyL)kIAtZhhmkwlG2TDAXz6f_j?IH&GK0$+CMwRPb6uu`9wlbW zE*pIr4Fzp>erV=1#b+)kP*|QKa5FX*Gx<9+rr7vF=9@;Sql2U;_b)d;Ku4jzH9ef1 zU+2UnR7^2Aj%W8JRdoqa`4=zkIo6euU~Il&GAU4G(36)dG2wT-dB{A|x+54i#kFy` zD-ob-%Uk4I zG0-=i?_+Y|qK4(mr|ow%jLr&)7Iw=AABDv<8*N&9Fq&|ZA@{DjgoQAmez^xPgVjYI zIzi{o%!6lxLz+9H_lb&bUb{W~V?X+jojiL(3xD_wkpNY;y z4EFbNNr_uhbAO!=H1X(H6Ghz^xG2nJD7~26BiASJ;sT_sPi0BS z8I3oMN#U+589_hx^yYsasg9qs}PiZtHk|=5WOD$QqBv!oq*?#{sQ{WdMgD zdC;ZXam^yPCH%Pyv(=}g|Gd*n-GYX<%^Ej|lH!w&UuOy}|o5YD%9m}*bE-As%s3Zzaql=4< z?kjb_mZB0GJGZ|S->e{2(DP*WSB6)L)3e|g;kBql_hC->!D3Po6BTHU7 z`Ww-o7eXhh0BGr-s^gtpZ!eLMB)Vxt5++sc-(iIEU0+|Hf%vN+4FlloG;d`Jfq?wI zZ?wZ^dJjXy<~QfqyORkz^7ZAH-WO{!i2kH_XH$oF{xmz}0TV6Z>~9AsUz^-Tt9b@;EYYe=W>t2`B3H5m=LsGVuAdW~< z(+=!z4%2@aYTvEzku>R!yRfi62HhM`J9k;OcX)6xFx1ZSc&OS8H{ah1m~^4`gRE%Z z_ZG$s|9t(EIIP=}Cyg{&W^Y7_Mm?vMH#anlGjw70`&G3D+k+L6F4}fkX?k&L zABs1_Iz_ekI2eyKH1ZR@1(>I&hnC9IPJ||VljTpb(52M^;DPL_G+&bMgK7#jdzYV9 zROG5DF!k}M@@#!Z?t(ifwGzYmpIQtpm^T2hid)Kk9ev5zf0XXX4c3e zrmYRFiw+5>TArsDDt#Y*81niGv-m$k;aL|0l2krHs{dKs+InnfkMH^K2fBTF`aB7= zvA3&o$3{mVrlimm7~a&OsgB`!@bp;_CYXLHecIX92sCMWR6Xp|#28EmPobg_Q<+<3FWcT$WMk5|EW0^yU& zn$LayDfoh@?<5yvnN2Ww{nYV7DFzWIUK%9eJ>lDFP!Qr`8-E&ghKkC&`pp|A)nXc= z{GlAJQ}voEfYaJlbmwYUDJUwUp#UntR8ms%e6HUbLCNR59JhW{G5;b=Fy(j!FhY;>y6RmFH*-cKD=QQ#>=wq+`)BL` z2tMwnW4r2K{qm&+`lZtyvFS%PKIoo@JL-Re>*FvxMkcq9=6i0{nSrfT1|poe*2rM~ zOUCs-znmf5Y7M|X_Z?v0R0E0xmn2E_YzUeFNCq6K{BX15)N(n1gIM_ZL49Tcs&9!k zZww9$=)v^vcG-B-kE)9L%UM`ly>2lqzO~u$I5sxjYU_To6YZbHdAv-sjGqh-(OFK5GvHJDU7|aIWF_T#Xv~U= z-B%CxZ14#cY4mS1;Fu|yVZR!@9~B#m;3!<(eCW!V@+$>#MdtmNA#X<2r%8o*K7S>6 z5TgJvqWExCceni0r%yk9dgtFz370klrhoYS#v+hc-iI4E5{2B9%xJ$vHrxZG8Zv$i z6yLW_E@38dAPQ#Z=B7sq3`4@g8av~;MkymPpDu0<=kWTW1Z>LDC_7@%aHNt;@6OI$ z-|CfkoFw!xLMp7c)P3u5PZ|T0-!Eq;X)2HJXffnEVIc8$65B~NJU0X{HOoR8J3XIt zsyS7H{Z1m*(VM&Z-wVQ$=-3F}oT3iMe$_lb9~|;JugMY{k`@f^JE02nHzESsjm~=K zDrKdpFah{ucerZ@K+g*6#LRW)DU(%7ta!Fa4;G+U!GBTWsdlB2)&R|?pi;B+C($a=Jd}w$6 z6 zz{u28PCr# z1t^EjxD9|10;V9O-`T!`0vj9q=Iz@K8uhS`cL6T#9T=GX&IMx#6v@ks3_4a;eAGHX zTb}^|bkbZyK&iIJ1CtfLy&J^ssdyZ|b-m=MAEY@TR4Gq1g6KrQ^#|QKr_%F2jJu<-M8^3?; zFYws0E-o(Svm}SbF`)OoT^@1rd>3O?KV zEOi5(1VvTVAg_b%hfh0yTC(U{7Hj6@kAL{!w^n{zWTDTkn<4WPgY!oQmaTx!Jwmqot@q4P7^L3o5^Jb z-xJx*1%7^hRAno9H6A+x#3_~+-oJmpJ`*Xr|K0V4+5P(sJ5xSurDrsCbt9qa?iDv1 z8=F=F(Zf$Fz9;0n7)C}%zxH=t)PA>Cjqc{g-#E)O#GC^S4 z`4=9nPo6vp*xFtmoyo5QbQ-rg;_>4%2yF>AxUIXpWgtsAa6abo<4?VK*(D{!h@Y&$ zhb0e|A}T)qVP~hT>{%|%mdU8s#XDw&Od2YGwf+9ut& zaf6m1Ub&Q2mt(b1MTPB2W@tUYo%O^VpVrXUbtH*7imh+0&~jCN#la<%Zx4S(hT z3fOpbFLMnvaoBIWcl{Z6AuZaFUmM`m0SSwJ={IIz8wbf}o->IqDmf3^&z|8dP*lNQ z6@nRZQc{5mC!-p&%^H_mIP&M-PSmU!pv$hI9}uB#YHa|Ymq1<4qA3v`#3xS{xebM8?byYGc$3gU}KO#JSeW80SGtP zsfGfui2QYzO(UQc?S1Jrig zO_|Yn!y<#+=4tm{#Wd#EBx(Y#J%KRukKBNgg?b(AZy{(30v|t5(3h8aa7kp+MPq-D zH#mghals`bJeh6YX>eOyCY{GlP4JPEQPo50N6T7nqb=rwNPrl3>s7;#ql3VLSW8`; zsylRd68Wa{Yr7C;Q$fM^`E#=12f63Yo=t2y^Cl;UIOdAn^VzLzM+iIdS#p+7ahKse zU1dBt3JM7LlwH(qui09=YeL?pb$VNSy(NNj$x+AO&yPY=+sS9es$1xrZ14#pRMS`u znhS5eARg=ci7GTy#sx0V51Ux1GZd%$U&^3Rfnnm`d`-XJcVcuVnMvf&Z)y=Q@DZJT z_6#i^?87ly?fIg)af&g^M2UhAg@R~f!3&?Q2r_794;hJeB)u&N$Hdi& z75t=<+Ty*OY}gvXPjuOR?8d!&z}Or1_4cBQMjwIF;Aw0udcLy{Iluiyq|Yh=g=_|n zY|bO!d3K%eKse2A(1e9TL>ON-yiu)mU%lXNa?xjQA-*wBoUDoo%}SqrlH&RE#! zmOD6g3&p=H$j!|iUTUBGnwh-1?Vg{Noh=Fc+zvnL;Az$%pW(E4(?NMh{v{Erks9(% zZ^xylgTq>7D~nXiMaBXM^$IB_=Xk$85|TBsH(ayLx-RXs6Z? z+B=H5e_88#PhuQ{JAz$8P}O(Fbm~?skH}-*B9r%X_Ta7!fBrb)^W4psz-53n3NCh1(R zqsiu6@a@Ta0KK7w_^g;IK%Ut9)sa=Ja)eX^v+&X*!EFpG|978Co!*P_*?#iv9c1U^ zRANwoPNe4M;`^3dO|$xBqjrz@5p+mgT%0DhgAn`*L@STTdGR5%Z0qSEtu&+*be26o zE6mAx7HB<`FPJU9M_B{frly&*xS^%rPVu?071v?9ldF$wfwqOTCGdk8H@;!gC^t7Z zU-sIahYug#{I1W7i8{|^avJirMlbw&5r0To8buDzw1ly3f8{XNHf)GrF^O3YudtBi z)DW$7;S3|ysN5b-{slUC_gsuxYW;U`OaZ{yIq_xX(Yn0HlY~9C`NAhuG&C5AcvP64 zP1)ohG>G`WHD3XBNC9XG>P*-Zx<<-ulG(K=Pze9ou^kxt%{P){P~4W#2fMT zUK7Om#5dUkgyiFdgsYx;N?Gg{L(IV|(NCTzC@Z6*7?_yg7AZ$ZZrRJ{-n@C!+uQ34 zhzI!M$t!MFATo>$4aI?U24n#OM11n1M~=bb4>faw;6Lf~>!zkxKse(E!La0w8;u49 zDIlY4jie?Gl_d+8HehgopwkC2o-k>5htu3g~+<3vKkDJ%1hwJ3UoU z$J<+Edu1#*F7AAr5od8}XwW#$$c8QD7I&$OThg7E!{d}2Ms^#O?xNXrL8x?~0-yL1SH zAg2Ls1S0YyBq9ocK2Zp}6TzxKHb8dy9#jYK#;bmIZr{5RP8JBaLB52YH-`i&p1Zct zz0Yk?^bOu=X=xDRQ6XS9zX%neT|gx2mnLZRDTh`IrKF>`bjkuS+SFsfe$qQeG5umx zGwr#aJ7PwgP9$zp7{I2nU%9m9QSS`+zFac?Yn8{bPsN6YzHIT`ZJ4Ty=Mw(MT$2mC zb3a)&xy^C@G(=n1As&%HW8&c%nl_mvhnNsXd-wkR`)9p}Ahg{!wfHCMOJn} z63Sw_p*CTLy^7apWn~41h8+h72XVCEhV-kj&t2wwbd4B>&!L;YH8nvS0E`GKVThb? z^h<^7KRWdcrO9UYW6zKMxK`#fzCGx*oF5T^pXp=?QQuhdC?uzR?%x?8as{t@0vcFcjiZ1t2|vU=D$eryu>nRNl4G>KG^{d|k6)*(o ztx}}g150|Jyr!+kU&2nicq&uu--wP2e1 z^hvLsK*x$Fq3^?o6JTrf8-w^C5zpRsuhB8Pkgmcc%yb^mm2}AVVfa&1Q@yM?(ubG& zhK7P*>=6Os(zOCgE(QiH`0mMb4x5QeCsZ~@I;7Rn zWA40&XZcieCL{`&&9M!7M?|jTKeu^b>v&)I!eN-=Gfx>=`Ld0SoX~yn?&E7bX#i>J zm2XI!6c|_R-)|!$!;}M57{_Z@|Fw>r)LD%HS^wq3lRsyuu{6u@Te6vptw%m7;>!Tv z$Km-!7SuLR<0Z9@=zd;ei%mSq`7nIn<(1v0zg0xEE?V$Hn z_=l?8WT`qf9v;~B;tmq28cIO{0Wv@lj=be8TOWmlJXS$_Qw0McDJ@MWAW%7l!BGG4 z1S<~@Nnc+d`le7LgjvH++Rp7CtOYZ&;DIYTDY~s*r-cp#eM;p|Z+Jib&h8-n_Oqb- z_|VJ#8lh3Vn{n2Pvejv8zRje|PQIX?+nJ&_n-CDol03j=b4`?Z694p8v6B_`;o{pP zD)xzKo9;w@kqX3{0bu|VwgI-7eD>m+BOYcB4n!mHSj#4`vzhsMZrgne!a_$z&>*m1 zxq{%A^Vu27^;5rhM@DjDYow(gBuVY9lqm?WqkMB4`pWI7<{#g` z43YVewHqa+!qHxj!t&%q=1Ss^FV!1CcaB-^hE-QdNviy{i23_NLTRaDb_-`3zVrm& zOMif1p&hY}d_J`u)|{L*2mpkt!T&a_-Fv!m~HE;8Z_^C?8Aqxh4BBc@M2 z>_y#z8D3N)3CFyiR}++yYi%7g*U|T5Kv|@HS}x<$u#W!+>i9>}0TZtDenWYuh;STr zoQMTT*@9kKNb3btln#H*Df5lMk82H=cuS3Ru=Z?`AQdu1RBc^d7%(=NYDjMyOk0K%~CDpSht*DI+9vaNU3Qja!Ty+kfm3>3wg zboq^nbo8Yw%+3>J{iSs>nSnxL&hMtl)U7xhG-k77e!K<=_jhL$YPQMrxK99uRAky{ zzyB5e2jpw7Mb!h2`E-U946iL>6`NXYW3>lE50xHvyM5CupCbXjGR`>Uk7y!evJhBK z$1k(>pBh;V_6iJr`vlIVf_^9?OIaTqm%Q)~mxgG8V%lRQ zGWx_!*wr0g53*;zdPhMms>dG^^70pbPUpi;MtJx&6HBKI|9z5RV1nVPTbBgpyZnEo z2J79rO-}abS?w`%(RWe+s5eid#x_#2p28nYuo15?4pZ=4{AaEROiqWmKE|VL3dmuv~ z`nCAoWK7ugM!a@_Bo(l(A^s)v>SSbO&(_3XAX2C~EOcVOf2U~NBk$thhI$HxHcp~w zX4|jO_J0*Zgm$44!DkL9J%G164K#cq`6n@BXhQOL6O6k_E;MXx!65xg3M2KXrdDS4P zt|}_lZ_0xJ#NNH2}upW_$^j1+i#H)5GIvhB1kn4H=R3s8bhZ1393-R~>9oO5Z*@94z4z}oy@PQu1Z=7a zQ7P`<NCm42k!*sc1z8<>90f+=#m;wdgry}MI^M2SM4BIR40;B0(yZeW((K28U zKu`t)o8Q!Wz`3TnTzLF< zpU-Rf`RhLyx_|<*blZ$n>HSMc)=+vc`1%egbf7X2N9Ff;U;iVWvd+Z80Ms0X+Dn!A zYNUG>rHc}AVk3UB~qPSsHEL8 zapsN{5TMwmw*4Q-rDic4)Lh6$3k*dj^0G^ZDt{`bH}!ldsr&I)isEBb;FQVb!S}fA zDd*_W&dcV6F=nd67v}(FI2Le(RO>zG#)d)qEtndqqgG(jzjgbZ=xrI9Iz(ZIr0#Da z5;+CKAf=;r8WoP*WQ=AjV&JtdUUgW# zm$yy-OqS#sPLhU(FX&Ofbm^E9diPYlpeF27s0S)EASQ-{Ld5et6n8ZPXcB-BFlm4l zLw@KrsRm>Ul8n92HFwtT1Hx=;SeLRENFah24~wBp+lECJ8tm4 zBeOP>cZ}bONMzuT&0=055}h7?#zl#>mJGXBQ)d}3maht+#CoqJyI!lWSlJo>JclTV z%phm?MG8znczz1n21qP$-D&;Pb)+@}YV<~mObH&kszP?Dmb!skouhcpQl|cQjTYSX2~_A}zy*v%jSDz0pWpoU z4I}^$;E5z@A%~qH9Lcx50CI$_PySRcgHhLb#J5HaforPSX-HGTaQr160c7CqAp5vu z^st7XjvEHVppYcQWyo)tQRByZ+Io9NWqtb`rcWhI20?nHmPtDqd8+$&MU?Mk2eJwf zJK*m(YS}W~7`y@eSc4@`mG&CQ)A6UM0Z5tZ+wU^t{tQg}NR3C?h+@j%{Q{`tfC*M# zW_Z$DbLMN$32gwn@Tr7CfHa6KUDKg4?BJpM*+1nYDr;{3IQALsZb9q*#4YimA6iSxp{lW$Ye=s4R628 ztNpl!)*m?atws9$eCNh5u@^E`T)>#k&d$AkM z%of`b6&JT)Ujx(#T&M>k{93%JQ;4kJd3odkr*2pEhZ|5028Ac@0!ej3ker>HTV+>6 zQ8DVJ*3s#wATZI_$Wo!3Z2X$xp$yZWSNDSqmbjFIL(z?c^VjY_Z7lKL|C)KgNk>?r zhm9*SHfGa!_8j`$P!3kbmTKGnNT{l+R@mdEyx_FgSYKw1igR@E7@mDLVPyU;^JgMA zFNbZ8*`&IELPKdcXWPvPb<+;Yw(vV-PizVZr0jo7PFnbDML3o{rQOst>qmHn4h zvoe#275OJ=5rwx7$Z4p(chOL1pw@zzCTkG`RoycrR1e_EgH>lw5o1A1NT9*byabuVd21^IyqfKAhqZ#chY_#7yx2GihlFvO|g6T5^cwU z@frZ&d07Igbe4;HS_%wX5wNs)6Yy#YcXxrO+R3*{i;MMuT48m7H5AJ-hZM;AzoYRK zbzW9KA1;k(r$B46u)Hj3X2yWje*z>6d~~K^Nogt0#5^pS04I0B+7(TwS7jEI`8pmq zA}9#EE#P91f4Hsf0Tvr&fp;d^hR(!O-;>!r6VXGZ3RqfT<6rq+9_c# z5)_SdIav>z7QOZPhZ@z31}L$6MnXg5AM4`tfi%OaSra+w^(}l0;G`CZ?gy7R2$5{| zc>=Ad`+$Iyhgw-$#M@l)gPF(!&o|p5rtLkaDu4XkP>b<4{k%sgtejl9*H%M47gkyOf+P&8C=-L-xc*frS0#F9JQL7X1wMetcP1q20C0YL%@2QVJ=K-HR=tLW;o5&17y zbmXeO^->C04A;z&2INdrOAAdsjG)vYVAPTX)2+;un+%pfL{JbWI70+A%(9(ZJcLO= z@O56^bXbcpxlpq#e)DEXegyCmh-QJCN7=(e5QIdjgZCA|_aJJx7sBMeB_nf-LNK{} z;a^pBbnuHtneUN;8@ATMU=l4Y$Q z?w7EhYIbq*yp!hyC0WSv2l8A$fb)jiRPNpdEE0{&v>U2qB`qx8HWsKb5p$D;OT@|@ zEY+l@q9I1e!!zw&TVmqriB@X*g|~S~{K^1ZVZ5}AZ`#ReDnHs zTk08v)dKIAUR0#)`T|HS@a~ZkXyTdu{6r~`!h@Is4;}-9gw?vZupnk@o1dQq4im@- zN*nbh9#TJ*Bm$%$TaTE|@2uk>&3J!XKjAvBTv|UG1Z?D4iwbHr50+2qt z1GWwP83Ogr_7!*<_q-6s$9N4uE3glUL`z&hD}@ANC^KFB{j3Jv5|>oZJBf4@8#4#Kdvj=1**y53EzR_dI6WORT4qItMKPlz*sQ<&4b_ z%F{5|3>Pn77I2Kl@Z~UU(OVqI0+<(}3kwW`6_(%T=0YWO5I0DCJ7aa=oD0;4_XhN@ zpa$&E=QCMwgkNs65(;7xM!mFqjE}>IxC%IyqW0$x4-lPEL0Pf5BFMDP0^>r$q6JY! z@B2&O%aNkOT{keLP?&kybE;p~TWR{uE7iMqpV@W;jkd<^tt=^d^23J@D0N)5ZI;31 zBq6sCS#RUPyf{0T_PeTYqj%ql=WNbG4eHLcgg1gfBtRT6`||ao+Txds4?kFEkOM6} zb@+uBpXEE8_QC-NE=+r**qWo-N^UkH3a<}fEe{{>fL5rvwGfCJO$FwDw>cE*X0yNX-J) zv`ZAo`0Zado_5Z4w8a2t1|d(L z0dkjSDF6eRK?+-EvcB`9{n|5wxL+Wr85R^AoQQae%eT!!SsCeMWXO{cc;ORj*pPKJ z@2bgCF6dsc1F^vZI?|H!EDzuF@Dc9-I8dn5MfhZ(WWe9Oe)FaQ zXxvm4ra;z5rPiL?6Qd`hyvleEtSu%+NqrCB{n!_TMcVQ0zG$PaZl!>GdY#nk+CyQ4 zE5G&1Pxi7yoLxcYmdDI}l5Su`%IfmA>$p}ykQJP-PGkv#8PI5R~2Y8DX3nX8(}SpdWk zrphp+PC8E@GgN3jUZ@1w1AM4Bj1C74NeMD$WnnEC)e~!GoF?OX>>bseh8u(P$wZH zTV$UE-*OhBT0|QS4h9j9f@D_$s`TMtF(`XL&b)wbfnNXO#fw<;o6rs@s>MKB0DL9{ zu*~DG7A2a&e!)i-lw2l{6qwCHK#J%@ zB*Fqoxx>SCMMeE9V&kRXjC$@WLT~|7g$A)4+zVnLq%u9X0~S|i!D~x?*k3@u+wQLG zgAP$FOC=5D2$wEhfPYfI!)jMI<6hbJ@n-2;HgNv&zLIoxp zyniqWF+@ReP=f)W2AVo9g)rXb2@sAzXo{+bU<>{vB;1uvN=!W0`{728s^Q`81_Jq@ zmiTRBM^!0q%gajx=+a>bCv(E_*2%r%brUYaOzEaid%c_DLe_P-O zDdM>ZqXfa`ZNX3*05X8YfBhp|FhOxbZiW3k`uh5OaFzrj8mU;h_3X(LKM=Q$8IzWR zF;0ZTE`Wn|SUiEHu4O9soV$H(B5l^5AC~gt7cW-rmoI4TWK2}KR8jT{fENR?8ooA8 zQBhF=WYwTvYg%y?f!!9z?~u{I7yrL_`|@}!+ivY!LXl)j5lTWbkCk~Wk*S2tWGFM4 zGZZ3ZNTxD`L}to7&x9oNJX5CIJad2R!n5D~+k3y?Z-4)Me^k%IeP7pko##5&I@Yn) zd0bF&hdwJsW##dWNFais0`I3#oVxu05(O?Uu0i|1A9$XUz-1J*wHH@r@_Xt*5yhg& zg%!FZIDi3>aj>R;cl%$5kF+x6T4iK7?#JN!$2B|-wfTo;fKIW0P8``0Ue|q;rYA4! zS)|e9LYEK^N4@Y*AsED>yCf*2lN2;AAg^CDbyaPEZUQ9R><0=B@F?^TASr_9xjT}Q zNZOwQZ6gvA*l=;N)+gM#juo3nzX3l;!gaPwy9XM9314ro`+jm8Uz`S2+FjHyv1KaV>Eb?TW`a{f`fm}RDCb0IDmu<-LpP% z5B5Gdgktc-2LhmkPKyiBO#}G@!}IF_0Rgq(5VT@7EHmv?vka}Q zI3VKE+o+GE&lz6LgmzSIlA>xTG!ld8CRjeMvc==Tkp`!?^stUN<|0Z8Bs{b3u@?VGcVVs>@O}vy|!*&#}_{?vI89=$=fwI@7 z|4|RYHQduTrGh~&@>5n?+MH{up@^>Rp#&qXa@qQMaaurJl*sxu{u z%M+_+v4n_&vfX+vgG1XIGY6n@~>z5r!Xz{FN_{4=jLI2Ercz*=YUfX2KP^|6PggpT0Kr@LRUJ zaMb_k2t|3rZpdSXt{Yh>)NS^P!P}@JUv;SE5FgcYwDH~0LcrCx@^2RT2MffgYM%!C z4ThculZAedpHTf1ZsT$*g?b7_)Qb;>O(b)|B748m_U~T*f2pi_TiK>MueN%$!u8ym zY-$K;5D^C*W6t{@aZln&C0`a!%&$4SEJO(eF;kz9>cKl2N$4aD&kj9*I#?*bas43sD@k&G*mx)uh+&kqjTs(yJ|h8%;>r3t z3MH%j4sD-$*gTC2roBz?_Fwv@Aq|>TQ8_wo|6e5t=-7LW@>iaOr^ohwag8wLfifIt z0#b&Pxi{0`$>P>FS!FsM!J?h7{Z5~ODXhE+Yb$-73Qj;ADV#M@Y4Lv^ZV8-LzT;DK zfX(Lr_B|JPgtrx3cJ{aDb-uGOggOk!a)!45`FP300jXJ`{;63_XD;&O8Z^8p)=@Xl zu)`!uzllpSb>DA-30D3wz9~mijI*A<(_psK~aPHHiyJ@0;?=&0Vtk zv>Cmt6aRC83+I0j95{n}ftySB&6~eE|E}v2+SKRUg+XkA$MyCnWif5#6Y3ftd(_FC zoJ8;VH2mipooo!jc@*lE@J=J+F2UoHZoP{AAGucXmPb`IK0y(oP-O-kBPEMLlcAZx zBJG6+=ayMR>>?A zD6c@n0I&#fio)+mYQU|zc6FjauKWiboZesYXGfu^d$i}4-S)zoM)ke~&ShewPP(qV zQN@lP$TVp7CDuL^VR(If`KYdE7yhL;ka_!1Z@}fuvED%H;qHRlfs*e#$@4ykJ!oYs z_(0zSSqG3}7N#ch-xQV(%6Cf-`%;fLhVZ8fj2q}iTxP!^v1AamUJ~Y&o}_A~;ZV z;LPOb2enaYXxQ*jzHV_SHL(1YAvO-HiT=r(z(t4ia12I#y%HN$jDKgM-YHrlH7dY% zTCHTt?z!mtjEF>zi$h#~XjeBK$oxI%S*EhwnnYR<>5r1;ZC~hy%g}Kr{Pvg-(dyZ+ zp*{EAB4%Jw0t$0tm;?UT`^!PWw8CdP3aw4{7X>1_X2rq|&w(VlF!-P3iu(QgSFw}r zNL!G!=V${B5?4TO2HL~oD|RiiZ13{^?`j|avj75Rg4^brI?AJW?3rMN+1o-rdZXo3 ze=O8t%Z(|mPo0_-wzmM;^KbX13Q%z z645U+a{ZyKZP9vS?u^Y&*bTkHefzlS;3o`;#Lzc#w#zgNdOqODbz~Thtj)^TOXTE= zdutsYh<&pL% zFgvQ9(QtTmYlKumcRWn>tfT)ajq^_CtWR!JvxJ0zgh|a!mwb~9?$7TJ1(urry-ES1 zOF)9?5`@G>O-$&OuPL_-em{)}yP-xKg3o<^V`cEHneLkM4`?9bJN~=r2>|o*Ns=6c zbHp-~hatx6OSkmkem(r?jB`h_0)?Liedk2}@fOuFt7YoJAkA2lB8+~-sE=wz+G-}aM|l8{m(3Q3(ol{jwUBa{p8 z&K^mN$y6U}%F>{M!@|4?srY=O8$}M5pbA)E%9jPl%%~#n>9SD4T`6zZUW$x9)JMEa z0;#yy)ZGvxklce>Y&BQOBurVmB2XysuZ%e8V`g9C_D$w)Yiv9hzPh?9B`1zNcJ@){ zh|Dh&dBOl1T3U8}5=<_;I5`D{L~_TQ$k#A3lhLDgmv@R#hha(UdPW?&r3yI!gF@^X zz@=}I4){Y+wD!uQ=tr-Y5^x&lX8voa>BdPO-REd}>148LNuO~Pc%AToPV&>uump4r zh&VRo%|jOuLJzz(ezT{O91{x3>#|!et{@{wIlgdwQIWbD&HT_Z$g_EOYeT&ON&XLS z9)dY)$w=U)e0x?Z1x`8;cbqx1%jRT8h-lhMU>#ZkeKAb99wb%jwe;9P&pl8((&2o4 z;;;$j?sI7MSEIqt_=IY>xB}Vt@hov1%~CsI(()%W$nK$%h(dvm@>%bj$m&-esq>J; zxMCS)w*?qOV>GAh&hs%B`zBC#PN+K4Qzwbpk5tT8pLl%&Mg13Sj$a?9PD6qlNH#SJUh+FT^6+dbC>AZ9 z>*LZFt^BQLi!iE_pyVXUbJAA@@15jf7DKr*KN5T&b47hI}a;mpHG6v_r;|?S`;%S`(}xpQ#dGrg>Ym4@K?yx1RYIH zPG5mnDk-aX;k>`1dP34=7|_bq<8PgYZ{?j@PaLxOVsah1-rn&tX+Tea;zFS6Nzsu! zL99S6GKAv6#W|8RH)-_G)u2h0ViHRCX41Hkatqb%9}e4~(N-0^@paztIh?tcEGd+* zSQ~3BH`X3bC8jFpV{z+M`)161zvbm3TG3M;Gg#k?x17#Mag=C zu8=~RQ_IBXgC{v^lDjZex)V!hQL;Tf`XS$Z|K5teO{Cx-?G3kjFo25wv$I)&a>M;~ z`V)1fp^#YtRyPK_VotAOx1?KUS*iQwyTAvS2Au_?m^;O8v3PS%16ZGu=5qKKx@AvV z)h^Dd_1tbK78<_VJ}Y{(vl`7TZ?mG#`$kL25m?O%&Y!zQ0LGV`Z|B-aMPQ2cm3ooj z?hN)8rKmj7uJ#!WS-id;>%rDsO*W%0E7O9{F5SnBQ(^j>YBxO;N@yyoTJ8I^MA{Y= zF6YdOULr*VVrz{=7#=2?T(312x#@(Pet!dI71y%{^{8^k{bLDHbpDAg>hz&D+{ZWf z1UMVXQRS#?+-`j(GorbEQyeSO^q~#*O^HmbBT1Pz=AVab^59}Us8P`#n^WocH<8Qv z>y#Y4&2=&3l+TOKb!}a3U)IMe#Wn5W(QwG)AM5>XqTRzOGP~pE?Jqc-A9&9v?^-Vf zO4!pbJ^kSOUU&8F@a|7rR({x_P;v?B1(T7m)3y2dLe01V+oR9wlJ9$8I4XPp?@o`K z)k-4DrX>6UisfZqUT}fF-a51#Ymur;%bvA8q*hpk<#^?uj>*viAKbA z&C8NbE`y-EWuQI9~w-kX^x9!gbJwy~7ky(YVj*-9l_gK=s1-??f zsrH4@?r6$97x@%|)|FA3M0$bA*?HB{{XU%TzWqZ#NANZ( zB>L_YugHfzOmT9kmtkShrwEM?1kDg;=omr**VdlT_vT=uvO(lL z`xNiwMdAzC8+elP{&fjpM%Z?w^PEv~-F~PGSKIV+PTrGe^_U*+4)wqV9&y{^Z&g*5 zt5d5#DtcyyDVEE*$d%2beAq;%eI4ztxyM44Su&}0y3Nl3OH5gc$-nM5EK}ar z^d$EZ%KfAVsMMN&6mc)-;PcjG%i_bSOx6QCc+3{zvdb`n29e*#SL|`==w>6z4@Q&; zFW0oTG1x6MsEGAP22H0SS7GykEKHduTf|;+5<}=$UJ#l% z=O-YUDflXr?xB)WI|cMLy2womDqE3hNu@w>kr-&ua+=Fw;7}C%7O&KgJ~X{@EgJ2D z!8ToHBn{+lm9Dx?(KJ7F*<0wkg1Dm4Vlh($;EA2 zXLW@=l>WG52Q7XZ@8ceS7@uAtm8qtN$6k;_Nail$QD+~-zR)0y7Xb5!#P7*oiumvP z-oY1y(k;WeOUmQkig8^%3(fbZrS{b}pWi&&8$&SX!=B&3T%2J^^~;4k-P2R``M%4i z0cS$gPn@Nb>E5xmnGJl{j=gnmSf*q5M)D3w)J7sEPi$fD=y=@o%1i3d5K3RYlCHJ! z1=ZUhcAcVZvsk$rs^*$E&*HLc{ewX$SMss4pSv3RQ%Nan*M41H@oEGn%-1BCIPEMw z#qJw$_wB*qCIu|poV~ceJQWjt}0w zX|F>M-@`w~#cI<1tltD;O}Fivxus;0F-&3~#dWoW`S3hC!{FPybYzE^UrF?c2ddQ{ zA@Tb*QB+^m1fkR9*AaxS)rTk@V{-lT0wbzEbLu&2Y+MQf;45d3rXn1Yb?J8HyBaq< zT@mJw2LJVwbrR$E=0O`C;q?mGO#QLr8(OLc8Y$6mGe6ELkICXXN6iOZ(= zE^(ZXAN(`Vle&g0aT0V){Q~vkuQ1mAgwadB{Fn&mDrE24q? zCLvzQpV+ts2PG*bJu2D77eXqdYWEgb<|#1q)itlEgo zzrCkuSN$cM4O2`=_!%f(PHv^%q|DMI|?3m&d+yN7{8=|S@kIf`(HY4SHvB|y;O@vpwI zi`Pls@uQ&kc=9uwDmwbm!0?aYUc1LP+770vH6=QR)0eoprCtkPBE#$B(Wu55IDtJDv?VOD#4^sZBOPor} zl^i$wZqwjE`{=3HX^%fM%R^XgqyHk>SLTF5clvX^s0rjGj?$+iY6u$aiXj ztWqk}(vB?A`1Bn6x`{~Vkyd%TN9xVTH&_@MQl#WaB6Hhf7|7yR+}Wn7jOgfWNXRd~ zbUApzNQSB9x@HW0>*)Go62-UHY^(^tP4+{tFo4P9QOs_RTcXIvM3bR>X zKEbvK#YFu;TxIz?uF|a*E#cOk9&+~4B-Nf1*>1mbt=#ye>+3${|>vyNNEpRC?OCp;ELg4)1w zQs7Kr3*f$YbttV``8q86Bl5P=g^md;r?47PG+5!H+}->g;;ZL{TWdh>)4D9Gn<=ty=JI?XX*Ok zal-^G6)g>Rmbh4+dtml5Umsq!-Fc!n7#N4Yp(pn9Qpf|?AeXaUw-S3C)?5Z!{)R5^ z48#>(Drh1da9BiE%`}abKO=ZLMucF1FQeY8lPvBMDQZ}&qYea-^AHyfB2Inuz zvo_mLlgH2BBgsWnF$OXd$HB;w%tpvCdGdna`Ae$sb!7L+0=;^w_ z$DE%-YRdb0PO>y8-LXBhf^Qpd1N@h_=^11#oH}7>*Twf&^MbO+Ln5@!FEiY~NvW|h3!f@0uUSZL1ShxlhY&ZuEBp>{Nmt%jn98Q!{GZQm3+T|c^124hcK5`I@EiF=Nw_=M*LTHwur5m`&Dwp0 z9rs9Zg$uT+?!cF>vvB^a({U#6j-u*_Z>z?EPmGgP-wt>7*an)QsoEGq?i9<$QBP2Q(;G#RKkRK8S3z7(cXh-zyW>@ z@7@isuLTPuog@H0AHhw)f!RB<@G;$&>jU1AWrN~S|C<-{-gd}^N3%w8%f5C z^`GAu?2$gM7S_NBLF^b@>1iH3-)d1b#$`6VBC;1*(KCfxgPaeaO%8>d`{`L4Xp{i!4h#~f`vNI1Bdxn;bo_>*tCTla`yAiOZ(k%ID9 z9RBekEU;jB{{-p~r`>_jAhDT&IM+wQ%gl>f+X^rCw=0|n?WbJ%PYtcZyjU6a|8a8| zne(sRx`{d`tQf*IAUX*X<=iUp&`&1zsE&slkl`d^HQUPQWMouNuh+-7hzG#vhQo+8 zp|;ZmDKZH!zVT)?usO}NMR8u9AWDi0^8;gM{&YaJJDxam>uN^Cq&^<-Sqbr;tnn|5 z#h>|Eoy}`y#afo(0+4i-p8kS@@QWL$(O4htcao;m8k~$^vSei8g__9#ppu$tYX;C6 zw2db6W~@~kVebjIauO4&u|6qqZ{ZwqohFsNL8r;dFS6;stEzIlltR0@=|=&yBfaIa z$||S*a}xO0*iar-U7%-9VyM@7RsNx6`p0(W@^b*GvZreZd!tk*?M1Ma6hHFwlcJEH zu-W$a7B6fo-?I@!={mH##i2FM97l$nIBNE(HAmvmGiNuXm;woalNC~8CRbbE6u?5= zh;z(q(ElOov`~k4j)w1yRKU3B6;2M7Q=iM$b0bO9hnN=mw2BeUT@ zQ;iO!Af>(z%X-CNRuY=VB(l(zu!jIhX_hN7f<+ksGC(hT#!|!;5u{=EZ_V<&<)FMT z9zUH>G;{gdEt;zn`nXJARlwcOvu_bS!tC0b4VEJCN-_CqaV%Cu$G-$!SZ|1hpB)=leu{`{dTGD0@WNj7uJEIZ z)r$wKb&oEwWmUtd=<6HCR=j5n^yK-#$(DCmn`eX%#q_{ptJ$ohIj`{6O|^yH!{JMJ zA;xS@-%j7Q!sfFW#sa_$IL~qIrD5UrPBZQv@9OJgZRC01sV8E7CRm6^6OG~gOEUJL zbuo_Pov~3&#lt^-4eBoNn-s>}v5VxWpXbuf*K+>j{JII6uXg3oF`fpmaw^Y}XLzf#D<`>5LrfV?acus^O72px~Jvy1;!^P6QT(3}B1A93r~$U8(4 zJ#4sziD>6jN^e65=oyfdoYJ~9H%nz*SbA>~5Ratf&*SS?Ejk{A@!rrJv6cZ`;_8X*aQ9^0Sg-8iDs0O7BcAS8s}t&y#Wz z!eWGQA3jDSI@NT}d+Plp;z}&wA#-Gr+Mvu)xqQ+g`l6-oQ3kGDf0(pwYZ*NFmJMZ6 z#Lt-{c}EeXH-HX_JCL01){(#7%giiSQj-q|BIRrv<%1;U&th+?sRZE2qeLj?sE)R+ zWrx&c{Di?x-@4hLM}?9`^n3~2+}oexiEm@HTdFM0VN9cgtPLUP_kOzBmmW=bDnLsBcUNCvkZka9KGlz($8dy%ne!$01mXMn^fQnns~$gZ`tg&b-m-rtm;%Td zw%g-*JjS+}Glv$1b!br`I(=;c6japL9tXx7F&H-@??`G4oo9!Z^%EqU zImry=<5*bGE$k$RYejrC0V2pt|2!i9+yyfhvNcj54PSMzwz7feB%wG&kdvxiyvarl zvT}<4kx%|a@mQY$|AU3Z8)|I=_*lZrWBXQ#`68xfF9E!`d+6k)H|~ z0SAfSd^)iciq3}+L{k5OAGPjf*Zc#Q$welQ{37zMylzfrl8Ffp`{#5&+hWH!w8a(9 zzJD}2#;8-%+b)BosE~w4%$1ecUu6#L#vfrXcNSKDH&NzBqpZ9pd%fH82fTY>BImMk z;K2I9cK@AU&oq`IG_oWg(w9Ip0wFn|9}U?{BzHE7I+p6{HBsGcPW(FVUkhLQ`bsJ) zsrx&K4EnzBn8<;g*rMm#Q1kX2jfJr`@xj4h1?Ohz#lqb{(--CL3Vn-c*m?fV=l8l7 zoDXC8ES01CoqaywX0u9{Pun=m+i!on6X%8vTX~|wRCRraVf*DB9pd5IBnA1pIJS~s zQ_hfSjarr7Qt!akB7TN0R~Fn^qk>rddOS|petWOmvOiD&?M~y7cdkK18A|9rHIa&Y zyX>h^z*HHYm6w;4P^oJ-a9sP#AMCpL9o$I1%PseznxW?np-66qBqXadEr=-yi@atu z+jO{crRpZPIV;2GFE=y3@j6gg6s=xIl~yDkP`w*k=5yO2fec;umbypkzRs9uL98g? zNZ>r6U)91}d!&70W5eBxWPHeh^)TCREN8dgu3mKwFU!iGUabDcUM`^9U<#>Jpi8W+ zvt}0h96tD8o;4t;dr9tT0#t-IR9`?Lyi9x;sC5yh*Pfhned5@gE&Mhrvc!Nt)_ogW zB>A=!prw@x;f1xrc9ZIb%{_xkn_r{c^!|!^HR9e=wI_KqwbukD&0X;TB_YHVZRT!q z!$O?P3VZL?Us^Vs46?23aO58H^`W1eK2RRJhG%!_pc1Tt1X`~i3<(!IT?5R>H}vooXZwC{QL z{@ff7;Z1nZh+Vhkza_5y=$o8Z&f2zpe91asKQ_jwUDKSlSVO&efGwSPI_|3k=AR#4-=R@^4SBoc!4y$8r0W^Qd@U7!G)pn{q-N&QlJ7ct!L9B7?(*1cJCKNn zoF2g&WV-X{JR0?!GeBQ2?={noW;j$-0$YTtguL5oH&>)6dJDMX#( zk-g~vW@!a`W;u>Me|$I$Q!9`q^r|A$5eu+r?B0M)L74?4cw3X-(An2E84>rnh^`Nm zsr9R1tJ?14zSLb%QStGk5Gm1+i%@yr2Ombva(>PJP<6DK;C}A{8yY=3OSQb%|5kp} zA&Q$}EQGvSGtkt~a{tT1*48)6ON;3D_q=~a>O+31b@uvFYqwzO!Gz9jpVGYx<^xr7+vQpwyR@bnmGLXo)m9 ze@I($d>a45Ip&hsF=< zm}<(CY(shfY*C2_qt5R+LdWf*U5;C;3Xc0DaaZUiy%Cc`G9%bN78L__6=SBBJ5-Q_ zyoBwIP)cC>DF7d)Sur)?gK-YoRU0W}y2V4!}hW&m#ezDY~+WT&o6yG7!Zu`y`m7dol z>^nEWxx>rHMe94?T!^B?hU+1;um9VkEqwzvz~45FW;`Dd2(vXBnuTPFMKi6CwwEU04Irew-N7~a&qjX$PW!|`eV|N zn+|-yB#^_$f_ZOHXJjk@T~KwyGQDPOB#}=!TQNaMD-?lSl&&5n{KVP3%NelpT3`P7I zrIV<~_3gC+K|kcr_0WdXQs1aYO0T6~rHXwCl$6il0U!9G8;g%$!S>{Pv_Izx6u`uk z%H~F4EAa3^R21yBrK`rbN>6K7JQX;v?ZkgjK5Opt>Zh&30ZyCHHbr#!a~fp%fNs#M z(~@0k^fRp9fMgTic3Geg!Rw}`_LSm7I9l$wpM*zDO8s~3_DIP}oEk)sWp~TW8qymP z0)}Nae*-kQ7-pI|<6FBNtN!TCXi{lb^c zj72XsdF-1yX#Z?QzMhq??GrTObT2YwS@bE+Z!We5VEhpT-IRm*9>f~|`3f`F31Bj< zV2Z$r;p5Bp8pisj5ZS+2JrBF2-su+qM3Z|dA?hr;8S4436ofa!9}v&7H)lO} ztgskRwQTrKJaNDV%tw&06*oe##yaT{LYk`Wviy>!V7oa(brd)XfuWJUSahO7*_fhN z?$F4!B&`7O#~}b65Monsb5h4ETojooq&9@0pI6l+Pvm0$ShYmNmJLkp4;d&_yhI=w zX0txoaj~i5reWFNK*k)y0XD?o?_k`1JT+q**w=GF0G+n89c!*pl0i)|1@$5tiEKkj z?)C2Fy?*N-^9`VQFlM}wHS>Ea=#anujdAtj%@XlTqu2-aamBFr_MA2j=g;)hku`Xo zKq^E8Y(<1%RgpZ_0ajSPNq-`g5!wmJnp@TG(4Fm>UlwHdF7nktus{X$X5=Uq`UKmRKNQ76-L0J?ZKN zfz6xdC4hmO`3LzmAx~Q0b5MmUtRfC(ZQock|2@9a*Jv7U%KMJf-nAbYqu3-A^r>>Donp1=eN^bcQKMP+@A`{;4%O{S|XPm5F8 z@1fT^7j(On9WQ-o4ZfnNm3WbdJ9yZ&fwx+%u|U&ZJ!^u_7#3sYfNgj1Da852PuaCo z_TVf&^*A7_JCR30YxT!39C?BOW>t)3QALHuDHL1KQ`76bTx_^h-DQ7l;9P08VsZm& zv}tA-WJEZGUD4{voBS{hj;m8)C=W2=A5$9}!_J0D68l#;#oj``UBEhw>v=`pt9WGx zj}shF1a-hju}lKupdrvX-E+Gb9LWxsGIVB-qnNNfc^&XWK(4kd&^~dQXoy){>xB=tDdfDl`)+>3!`5E#?+(T>-B7EyADT)Uk~W%j9HL4NTw|tnrbtjQAu0a zKKRuotkt*HDvzq1BKV2!!7~J4lu#_~Df84Iqh_P9u!4YyNDBCc=6b@Ry00#{6YS(ls_w{ub0YRCtUdY!_4%RiH7PuNXcHN?w6^$ z0+U0o2f(8HWs{xxDW$K$k)IaxP&kDcP@Y*VS9}5i5b#%T?gHbbn@^|<*_gQ!8phw5 z>uwF>=FKhk>`$RQAgr(Vdw1}e=(ra>dwfrl9%qBpb!|;L7`MA6K&F}tp&e!`Vv8dO z+@>>oNqiwH)-PYx*eP`btXRDtV_v!VesW8AXryRPCiI<|xwnzX@aMOXH1KPzW^b2Z z#;lxp*Kn~B+E8%NPL^S`bE^cZ*bB`MNG@gMTBs_j{hr&8Nyhdk+Sn}t``Cd(vHZ78 z85#EjU7QtK{#~pRtW%zie+hB*rulV(uIN*FeuZtTXI~8V061u;lcebZY~c0&$8RQ* z&b8|%BA}hKJp{5_m;B5@Kp^lb@Qeu~w7I}_71?x9D~tnDL|M%q{50XY0tJJt*XNX& z(uKT=M=<>gG@$Su0ILNrUiX7mfnULu6>k;I29^(g+Dhb}x@MQ}pBJw*XHPxA zZgn{*XSr!+WZYAsyc$3@V#5!*253DiMD`|45$YQu-@$Z#kL~s0MmzA@zFG(hHUIIm zJa<84VxFgTwg0L%JtMR_Y+!{ptEtXnC4?AUIszsk6jC^9c)9M4}0%M>{}Qgu{ki0 zu0PbxExU#On)g1N3sy6ZVjXf}1rg_xfn5Pr(z|_>G;3?_ zpVH!h_rv$tbQsnpeunBzO*g!gN=@V7&_1pC-W$pdh`ZLXb-1-ZXu>nrG?U)(O54_Z zE^3eK3nb!+-3w|&UA#YCpW=O3*e;>eTm-zl?i4n75IIC34>{6i=3er;F|KEay{(Z( ziKQFhO13sFFl3~Pv*rk(4|)t<0~kiK;DRDZyJ>hlDM*O10^?YufzK5Fi+3xRmf}Yj zMn_W8vkTG;*m1A5M_%I5xYkYGMyPox3vuwfd})~2*?J5)KLA57th|!YYO@#l;S!mY zq=LKSC4iIXI|>!|1-=lYqBl%=J}mWeTcWIN-6QPyP)KIX)}YR|lhip?_8h<&)ET5D z5+C(e+yF}b-K7e3{9fuTA|%Ox++oaRfNBon6%@mj90j4ZqgkEAERIijwVE=3*5Njk zNJo=jxgr4u!Sm7e$@l=NRop8``G*fq0_-7*b(*)2=g5N;-raq11VI~pGx9ZYY-PXx zI-;y*%vHxfP|?;y3E4LywuoxRxDEI-4g|Ra)!+}f2#o0BFzFlGv2<9 zJ@X@HaBX^)tC;_zXmk!YaB2>CpE~ZKp_ir>FD?LwaS;Hs9@J_eEfm*sy_#<};Eez` z2ogbpR-tWy>X7YkEDG>ye8>=m^1_cX*NT$OO$3c@yMG_P$_EV#01G&3Hdfi)J)l#f zS7(V4kH+S8A*KZib zhi(oiGQHpW+t1n(St5<*RK;l#RjVar(J3LYu3*rc^}^EMW#eNbW^D7C81!Zexbzzv z&)$vA!h!kE9ZoUE=c1;Jh0E>h*RgkZ^xN_cokgBV zpK+r-UEtoZlSQQar%;M=Zg!x%W{~KPSiItgHHl|;F_||VuHfHM(5|gi!qDq{D7-!K zVzj3>d;79-2X}j)JAQrOAH!hIg0vGxm#3%AHFs~(#Ox>g1{IhtyrF%SpODwaRJ;Gd z>*v9P?9BmGz{TMQFMh_FH*=Wn(Md|zCAd^XQSPqK4dQOtuUoH8N@W;0m-U^Q!LFz; zd>S_flOHWenwc~FJNGQy$J>yA(Nr*V?!(6t$I$vVVQP);9ir=WsZpISZ$C6E;_mox z-16b*993Rp|1&YtRI5ZxtP(`*sgfu&%{J_n?Mrpa|JNng@ktXObUq8~w>23l9B)ZF z-7x#Ms6T1eltRkuLnFHmtMw~M9mLts1Q>p9ogg)SaV7KH_jFJ74j(F(?jMv~pKkor z@~y8j7%}}WMpb-s6q$`Yky9>27xP^U9ETkxb>IalBYDr-FCkB{yDuVHy)a^zkG8ka ze&s_%J0adn{&s`ulQZq(Jd?n69ok>Fr>R;$jQWgPay0h$F~6e8zfMc`j@4TtkSbIJY;wJw5RZkygI+jb;1v67PG1=N(!oq!;FL)>ta4XZ|D>FLKV7dGdx=r`_l z`NKjdo=(0OA5`OTC8OkRWNyDdD^+E!61iO_w^en8>MQ)P})@|mPRIdVDpOY@2oFL ze`ENwCNCSa@O^xov2eroMbze~IMtWuAGvvjNWVVaw6jQDRNvhE`Zo%hoDnxnP0B)v z@%lsNG*=363vByj{;+;PeA>)(>3rGg=EzJksseMv4u;3u2d#1 zy1=fxoaCpvI&*({T1(k$@B3*FQCX496lVju`y1yzlG#EMcJp zo71lQ9;&kQGFEzp^1B}(8~E+ z--dkZ8yTka42-ELh~gv`n857Dxzv`tnYk`1GUAYnKb(WFvI*QWai6>UWmCSSpmr+q zV?o^Z0w_r+nI~G>qzK4-MNBz7+>Yl!Te&NTflTSve)tNW-~i!E2N(9|n{$`#rvkuO zU5H}5#p7#t$iKzjs(t7C$H|I)HSU9=eBcH{fY(}1L0S<12i_{MIWh_nG0i>qzEEiQ ziKT)ySTf^3sU4e@XsJM(9BC}cWogM|?xghZP?RtqzZ z)(L0DNL7u^9&_Je(N}m~Lk|zKz4N`Q^-s;+`)w~CmvT_pWn?T_I`DhSvEP$?`0LF@3$%9}G|ZO;nC;E{MCx)M?q|Aa&~ithr0$6Pz&cZ9ErwSgrG9W^;Sh;4OtheSKI>|2}Hf z0)34Q^L!!v?+K*T_c%xfsi<+Kq|aZnE{gw_c=~GmilTzJ^O?km zhCc(05Bl?0&P60<)km?NgRiBoAK&E1zD{{z3;p|TQ+R6SuP0H#R=W;wqX?mU#`g?Z z6LJft^@K_JK>IHQUO(!fA zcSs9&eKkE&xxDz^S%K+ifGRh~Y2@%`*UNt{OHXDh-gO53-9=V2aQhlDzV~mhc1UfM z%+wIoVR+bDTYNYjX2(A=6lx@oxFw+7+MXZgQ}_Zb1{NNMmPdXxZ5*3i zxpjtcL-;ZI)VdLOXI}mZ@cs`vxQ}nkcXXvH!4`LVHd`j`)x~8n7 zh7+#K|BS{|yszKtruO~LaBk!@GMxK9Tu$7?Vbauie^0Q#AJ4{TS;`%P2>j1&n!Y>q zPzn97j<4+!MO16Zi01-Rsxme-EUS6&_i=R7IMNi??|t+cUx;d-6q&G6jMO1|8;tv= zHtzeqY`lG-^UC|C!9}IVjzo6`vt*dC zve5&mt>JZzxalFRy!=!Nfr0eYAUNy2tJXk98p<$%_Aj7{#Gc7{mZbXu5ZvqCe z@ysO>a|+-ozcpM!@?j|Fp`EDMv#-7^9IWl?^9l)$&pGVLx`=R&QrO{Ern)WgfrEtW`yX2O&4*u$^g2&-}zTyV!>5pOe^N)+ z_0U=pm|-&RYUbxec#FUB8mPEB;g;X6 z=ozlPNdDpV?XeAXVOa1nk#8P4QDM0>obWcit z&eZLH_-tL88>vx{U9&i3e2B^2h%mjx&)k_KyL**C|RCn{23T(UKL&A=<(o4AX9c4Yd) z+%LS;VlWWm&;TSyky!QhZSlUds=B=JMG6_}PnwafmKhyLAqh2Iv-O7-pZT6uTd8}5 zsw^V?m~A}3b>>e84y?U?qerHipk--Qc_U{7pOlvAjl}P^-uvNQyMZkPF5)ULArqe; zEIrB2zwjCi`s$pRMU;@QaHL=%Wn$XRb!QsbC{syDtRXP2Pirk5a2abUx{Im=Bx4+Y z^)g+m#e@3IhWE?+&Fg~zD7*LujGs9(*x1k}#QS3xZ|7%)8<0d@Wr$C0Y-&GyUo$4; zL+l$ce@dB@UrT9}{L`--`pXPE7h~({6s(wUUIJsbV!uiSSqh{EX&1Ogg)(kaS0wGd zgur?NRa=+l*CPcw+TbOHH2{;r z=0@qEVvK@HK-;R_mcy!>Q6ein4<&_L;j7NI-4sg=qO2m`THh<^2q)^PN#R;Zridxx zyj(8!S=oF?oCL>bch#6C<#pT72hOz9BGq3%(bnzI0+;Zi%T84CON2Eh&ROx&Ppy(X z(}n8{@d;V=%O8#DQs2SAC=)rn+TI8MGKnrZ=iT{z7p^E#@_%T1%cv~ZZf*Dmm5}ZR z0YL-=DG4b-MUa$6knR!@q(P-qM3hbuxM}G|BveY2k`Cz>5G22O;d@wyv}nVg_`f}bx5q%tC<=0gnM|)1v-*vp4Zu5>(BN0$6l@ao;uU$n)0wZzhQ`$~zUgT@$7D(g< z9Bm5fg?WW(-w38m;&gjnbR3o-dhG3zBvzqtI3d>`) z953%&<1(t+y=liO>D2msmyfP%ef^vqG`;vM~}|Sg{(5DAcX? zG{MQ!wxTj8N0h=I`TVG;}t_eTbkm9oZ zGk~-})c80qYgzJh%U*z=K4T2vCc^nnq1HRrU%SbyvUt!0B)n_t1#w&d&1Zg{f*g%ND zOAvYp*YpdW-NwKqjQZZHbV!8UQoVB#Q)?BKX4Ck3+;Z_+9&?N;{4OF#h`6)U`=Q%3 zAm}8CQiWW(9+_!it8z$u7}4gEokw6WID9@@jc?C&_c8~$!sh0u1?tgoNxNb<+cN|H z=nRNPGa$?8fFQH;ych5es?J=wD7Z4-9A$%|@w7S(Rka^-NO$|h26hPMbrK)2QYK!pCb7+w!N zIhb%coH!|D<#ywO4Q4g!cP)_dCt)5bnVc!{*mXDSId7+zd9)w7^5Al}G6@YsSo#cQ zGeVVdeni$uu@L*NQiJAp4xohYmavt}d_=W7odkB5%)oQ~&Ny*uDZ}9?~}?PBa;q2!r(aqxhlm3fLheDB5CFD@j7Q@nZ zn9qat>WdlkQTq=aDA|j~KusfraDdW?kn=9R4sb;d!b?>mad<8n77B$GkrIVxtW8|} zkpJb3@=Q;yv533Tl_dIA#K9(3MAFNnzS;AnD`iH-Jg?u=pE_==HN;L{Ws>c=*8q73 z@}%5$zld7}-LoF8RSs==@~OLf5ErRM-v-@IoQB6Cssbv(@O+jQWNfcjfw9y%yI zGkLGW>$2VsK{Fsx2#$9%+w>?v^VyL7s#VbL?LBNXuswxjaDTbu{LQW)mG;+n4rUt0 zy(?^2bDhPj_W#IN9bG>ZtNidsJ+YN19#W+7{2UvIHMbz)nmCLPEEtzvHNqo5pVd9C zOJG1R*q}CWi!=5Di`jMYghozD3m6i%tJhOJDg9fpL114bMG}a`;tk|=t8EC=8kl9> z!+jUN9tC``*yxAF%5^X0nMBf^-MR*Q39zFAMm@6IyB|hd6G3;n5${CSyTvASl-e^G z$P)?lHoWT}m7;O1f&2(>fA0yulg3RAbzIqITCu!i*Ndaa3{=XF1JvGpkPP6gYkY=E z9MEr#k5~9;4>-o=NQVW@8A>1DZ~xp|s5dO-C}{gEZ_2affec2TtI5%%SV+nI>kSs`bUny-Dwi#P@e6WiP(sMItVFmkbrBn_m)FE-1 zX66-^vMWqf#{nS%4NZkdH}*_E3pyQzy3|I){*kqBw{~~k6*K`h6@Gc$g*zccS)n0gHX;IJVN7qf>&%in(u&@}AH_(B?}=P**GYO7eR!FgjM zMH1yVu|7`5)B~^|q`##=c6*ptrYNQ^wM=oS-o#_C!vWG3qb&xv9*@vmy8(XmRrmN~W35e& zj;?I4O(@{@DVse;#65v^qyeOE7L5e9n7#IpNVGkoB{n_q2YmZOa6vp zRt0Ou@ZF=WmVw&`TQ6*=)_*_HIoZKL|E=U?LcLv9`Z}!Xx!GS0OLm(Tu*|12NlRj@5EYPkDwWkA#+=3 zx~L)QUjG9K_sgqXe$~W2mmhlv=ajz6eQDm>`Y5wqL$f*mOqOK@FfM<2fj43Cw$3iR z>N`piU+agy`b#M;O}xgbCqYoQO~SE#VwIRP78eUY^^W7aL_r@A7gSwTnG_hEx2{0| z8t1(a;zw3txn}j!rG7!glO;x88v|ja7W9x*PQeXIRe2%8G6m!cg>qRwZnO!@)hXmu zzo&nAll8Mk0wk8(@8$0dQRf|*S~c&yicLOaU3lDghfI6kvZ6Hi5UArN%vW?f40!T7~=;*pi)cmWwwljl0?>31n8=!j*5jG?N3cowQZVISdE+Ap*oZ}YI0w^*REaX^?X zdujc(*hnnGt^Rylj1#kEgOETPquLxyr>=OGuehj4%OLwb&T1+eUyAFswxMOn{?0Py zqO``s6Z1%RmX9V%IeN|Gg_5wRFQ6i6Ya!zD<0}Yy=P9`euy#*$jf@NQk;{(~syv(DR1Xm^qv9EMR zj)W=$QG#TAuwj_F8qYu$(Y|k=?ps@nAsuixIauh$d*^L-`VJ2QAaXl}ygWW88`omL zvV)@5o_3J+o)6djT@w8Y#)3-R!naaSIaHJO{ip@~)R+U}`EEIcXw>{E|E_m3oQFOe zDN02{5yUWX=|uB#~PrsaG0r zeXwLt2OO!LW6(c1fsF!kF~#3wYfq15qo61gf7!U^8!WBu=;*E6tbJr76isA|dtm9xk^zf4)w3e6hBpRq=rSYGx63 zl7|6@Hvzdp1P%+j9+ZK0q?8MvFup9n0CcqV>@7gc`9G?N?z_@I2kcj0>aJZ0KppCv zWQYQg)lq3Z8YKl$9sX}Bk)fTyqcby`2l-T5+*p{8PKkps5<>x0m09E*n=EQT-4x-_ zz}$#weKf<%Ax|~4I;H`0uv!&BkIw3TVTn~BE>_iiQ8%$sjjXK}!!Q&Ar+&C{H2lbQ z_j#vGjY7qh8$BPG2$F5@nHd?&HnP^0n_pm9cYOedS1h~QiZ=iQ#MH8m;5qD|=lA66{EOvKFI{5asLO)-q| zvCnl8R~uBDG6Avg_<_A&H~NpjJN!1lTN@8j7dL)+G3fpfMapfSwfocpb=dmfgDKxM#aAtm=n__>a2GY&GvA`OG!+H*c4q!2|AVX+Jf`jF<4>N#ihG!*@*g%;J33`fX# zzzDb~PanGz0I*lBHHk&1!E+!R51Ciaqx!40Qu`g3_As!xR>Uml?f@ie1tQ)1WQe}6 zvLFcRd$$p#v1AY2yl!7XHe!|AuBcj~iQ+b2`d6NA)% zVA+u-4nzbznBsWe^5bhO2C_}?pzx$Ba=)bxx&WEWzsquM?XxtAs8C32Lk#wS2@Dxb zgsi8)BN0<|ERXIBXljU9)^E7h+N746R#;AvAm~FA#dN~v^ZD!7)*IZ|EPF=Zj1Xr1 zDMzPlmjSy-I1vepP>Mm4-nD{w*22nS>#W{}J)>VnD-w?hk*Y7?L#uCl=1#Sc{xTcv z4bJ8dKlR{k!~+qf+~RDKm=6jl8Q-Jnd-*5(C`PGcJjC}Pq)_wy^e)(fi-I1SoX~F5 ziqipfpMv%qFb~y!I^|m4l#%o^wKjH@u#OS7c&JmAIH=o3+#>k5nSIN}J^>NYVPjBV zViRzSY7K)Pjd{f?W8uto(&OIMWuO;#WrU*X6ojvk%!BFYpP%n2_xxyYBts}@VsbkP zrPCPz&T5rwCm+9Jj9q=SZ~*oHhE5%%&*AEb&z0CO`qiNItlDw1)w14;B)s4ac;lKu@`)T-nB)UiU^4(FYjRGKJ z^7HF1pM{&sbEUcs@WYF3NiA6mIuK?_I zb7RW1%wvQBf@AxOV}mIuM<>tk=nl#13x~aYN&1~(w0YVAV1LXPhm%k_$5ADm!irTW zd8IsQ7Vh>*9+^DM?ShO%g_+{&tbOuYm%2>?lqlr$F3S$6#(VlL)lFO-^nB&$O5YB> zW>bUJ1cWPlm{9RYYAg_5@uh_hO-9-R5I_#K9Hc|wVtev6D@SakW|?2JM#pWZmxbxp zs!@Gm0IAewJR)QNfD=~r6w;El7nea}1NzMx2-CgO$t19QMb1^C{o+iDjQHT{h(7^w zWflL0g|sLP;<-p!bQ@e!4*%fj2Tce$f`4mpqrl=x-@GXr2{;**C-JQ)&keIK)i#Fa zH)K>LtR9ODlFC9o(4?jcVjehfZ~cklA+eJZeFDvWWK>!i=mgcm7Oe*0UzM!x{Lt7Z0*%CU;RR_bsSG`Mt!YvJL3{B`zR)cMxg#?aa6 z_YIiE|MuaWH{;lfxScH`^8@dEBKJ3TH@99`(NQ{Wfl&I}I-wc-#IuOsRsytn>*C7= zpAU10I=%;e+=T7DL-luMWZvEj5xJYf0}_O0MDL5suNYrz)1H=9-{SqPt{RU`*cNRe z{f4vx3u>#H1I6N}fBLT{G#}QwL$i{VgyiybV=<=e;p}{4cOKr-t4N91`tL$<=0iLO zBmd@M2juA;Zm^=vM6r1H=}CPxe+#3r_iA|hAOU{i)=@5xDKvJc{|`UScyJ)k89Rxr zDm6j8!lJP|^eSu*gd+AbPxp7Emy!Gg^)M6)C|D0sPR~hFo^4=Na4_Uuyq-zsh5#~F^!e58PYFChb zi;n-XKjR~=c9_~5wKX=INuhi;&e;w%L#E;B^}6ZkaOIIy{rk=6cr}ivGxJYPC(fvq z_{oIq`nwGJY98^%qQLNKJx3iQ+G-NXcWWHwN{`)Kh-y`NEY)5> za`mUT(IHB6x^>8Z%X!=LLc)AduniHiwVjGC7cX;q(mO{MaE@pWI#C<$Fb0J$*w9~n za)mhc?Y=I{hl!Mwy;?QaG-U?q+!nmXlSnFu39Rm0m{b{liyxo*(VzpJ?IT+0P5HBR zI`- z+BpPv-WQy~v~&>O^BLh?*Piu0U4lGL=cp(}yLKm|4w!TmmWJBA91=i52;z$ zoh79HPZ!0_9!ek0{*M@bUb70~@z)4mymDNK`qpu`U$xNCuwRvM=$eY%887Y^ut@_v8yAQZas`!;!=U3pXrh5(Vn(Lkf{9# zO+`6tNC>7VrV`#7ETkB3olr`UZt$r`8z1dpX!0#3Foub?X&j#28$y+MZ3ZmCiLSXG zZ5jch$*FHOj3#KnAUmK@EbH0sDbrL(rtmunbB*@GYvtrLyC0;Ve9x{Bp|^cn;yPy!?Idwf zdyP-eXr8O!q6i=Jm{Rb~2md%lb9P^ptZ=yJYr1CWnXBKo z)cfN)H^C_j{svQ$#W%mOA$h&(%frs9eA2&rDf;lc$e2_9bjd|pk=G$)TvTBmih+_+ zu@Rq}=H-68XH(541b12Ly+_mL*2C#}G+IaSQ>qzOG_B=F^_TRxnLoMr6idiJN__x4{NY{(ji6Ne44 z&P5%>jyx^t?xBknk+Men_fHVe(Q=c+V`3x!8iv?k!-)2Y(iFI_Y4JM$eem}NDSQ9H zDxI@!p^Iz+qEecx$)=I(@ftQ&(#R-;*qz1x!zk=u=zW?E99xgc7D$xm`g%tw`}9vw zI)5H=#A=R=>&(kN2-sCjzp78KS?!A2^6KeaL{=TzE+K)CEY4>vIhi<36o0gSP<2xF zx~GFsNPzOGXsemO4mYE?>J<^~=~0x0s}H^Ah=nB`H(^*Or~Y+6_xitv@qbo!%L)ti zqNSC_Ayp-1GGL@qvz|4f(Pm%gSUp6ilaHOXkM=)nsO>}Oh- ztG(|F{mkbdBs6-k9$(U}V(wcB<_2w>kVJ#GJxggOS5RS{igho{;%EvlY8Rmc)ysdM zU0E9JgvY)UPQ>7)<9SVWPu(~=5l`?DmVPoFonhuRw~G1cUpyi)B!@p4F~DLoVx zCTZ91G24W|srBXkV}0h%WoO>uW4u!MM0o|ht;!HKs{ct6Pxa`k<1_U9Eu}~#H>h%- z48K1|NyLQ7a3AwZg0>A)VC3fu^8lkK2XJ@~k3F+I8Fhco$xv^~&$N0h?9Z({z#Zxj5)`L@{XIL+!{F8PTNdox?NJ}`gl9BbUq10lw^>Qxe2t40 z-m1z+vwygftIz!{jEedGo?uAH!}DMBA&VCn!?0_{-H`u|Qo*Eyc6iGT#vP&+~iBQV6m@RF^Boq9YPrXrL6^*9q-kxfz+tiH8 z)AvLwAa}se$Bei??OlqFr z*BQNXIoSH%k5$9Xg@{_*5B(JkBx){Jjde^Y>0i&z67Jypo_QiA1!vj*P2xpD0Gx^{ z+AVQntFd#s4B6t|CR?Qc+!bLzST`$ zqd#(ispuo!lAls7c-FaYQ^PLO#y@qw(0%_H)l4s^`}mJ`|o&ij!6N(*1xDk9CEw7J!x(lkl?IK z7H!A6Dw*anb8vKU64EiNB%{eD$uE1HtZ~Bs{;91K^>v5nDx1xx<$k*({e!F-8r+{c zu^6c^c9t=^Bm3!)Os~aV*p1e)Th>(Jo(;(r>rt^)(fl@lgXiJuYxJd;xkdliLs*?e zkk)a{&;TVar|P#C+WezmF$K-gXe-sRL!|8du*FXw6@VN+e){!k;cq)E;CB|5%)cz* z^atUQr`^1#-{%+m3mcFh{O|wZxz)n-1s+R@PeR5Q0qC1W{`1MtUnsntepI6G(0m7Z zKKAPxLWsgw6Bg?#NB=z9WRQo^sks zi>=vsCX{fX@xNS7sZq|oHB%Vr8A7R1J<0e>WMDy@o2QWD#7@8d<4FGV&@~l6hgT1h zwrGReWmUc^2lR=&Y^|MLeROYr|# zn}7dHYu@~k)?XWPAqD!0o5{y`Nv>AGeph@v+su4EwBuvj zm1kL5ou>`e{SNS&m=c3bfbOeTuj1AwDJh@aJ#Uf_$Blv&L6>QZd$2-13-wT#nHs7& z&|e>~XL)iN+6my$cz-*j{eV3ZrWV|%J5;7{ZX-1J(T%^%@^l&hwb3;G`^A3E8I6CN z|KtBQ;0J%Y0ELE-4rtp_I(;eNU*AWb_rJGC#I*kh7N4OIt-y9_C(zta{Ll8$)`$fE zGp_gu_CnsU4iGK=;}(Ab(X*qE`9=I?p4W+DXW>Kn1^(T7|9yun{}tE|q4l>q{WB)g zSFh0VrJEv1UsQDH!?$}D)4`wmpvuw=RVtJp$ndHY6)C4Z`YbL`?Bzr0C2~_~KzexZ z3k$1}c?-qizTeYcmi{7MC(ySr#GE}6ZZitrzN*O>d@4X7Yxuue#JBGVp+O#&BBI@< zqps&7jPvs4%X}_4<$-X=Rj%yUrj%26HRBMq=^#*|L@r|Z@9kS5J7u^XrV*x}7K$5# z12u|4j63-%#hzVS^UdVY_xUAtrY3UKIBH4Kdxg-?8&$aI=EH?LR!McvA1yi^5{zm= zRQ>x))m0P_>rpC{8AdVoRdQ*S1!bB3bH(S-{+iyX(w&tNx~9eKo)D}T`;SLTRH8Du zaCb=jqE#uT#LsEUK;QD;FNIud`J7}`HZL*Es_fr;{5dag)oefI#GoKBy72f?dQzH# z^kq~T>P1?`T4BnO9_GTqE=PFWEVDa`w$JI;6_RAWj~0S z0@qCH=!7M=+0ljstFo(@v1rbh9x6p#!HB9(J}0+OEEunF!)b@ z#yd78Oq3Vi1;!?B)QujuhO6W%IfIX}hGl=z$_UY~Tb-LG4xIMh-JZ`xu|JbVZg~U! z@&3u59^WNN_g^gyHIdMcbmgvsLK1q}jWp!uuiXh>;rzlG48MSYTQN)G59HXCIlxWNJ4zRB_oOGh_8%tH0{%)2Yec`$NQLZft=khsmRE-NHmHduYR)YN|lliM#jdET5mbHK1u&q2-Gf%4~@aLQRii(}eIUaYl@zjO=g79fDnQ{B`bxYSMDJjPY z+_|s$5foJj`-OZeMJV4#Opxls8SSFxJdQ6#Eec5n8#MVnRYJXQOn zNe{7L;oMc(!u;p+E@d*x2yGLovc0bB8^D#_EWA5oJX)E|Gn5XZv{+h=2AQ$oKd z0>d;degB^HO;~6sy%Ke(nvRZ6pKz08sI4cdtUD{ZdSHirPs4dj1WA5QKQHiD45>1|s0Tn@SSsw zG|XvR{-kW9byW(I36y38$7KzP z*Vkf8QZJxbG3uWBE(v#G5ufSDzebjx@MT9h=jHtgaqn(h)vjk$1Y?Y6hJHP6{8y;h zeszn@R)A)1GKusxpcM9zGe#@lXG``h?-6Nz<%83f1} zIUK<#Ma78X^sK$jWkbn+y zczB*#=55g^CMlmq-Ja62vO3?BWAE&x;LoFPv$JOkD#tNVRW&u!0|od0K~43CzdCeV zTU&V(IjLIy-|#cm#M1H-4;T4)ct>xpd0;?GpNEGB5G{=5AIzPhP*PDr`9?rM0H8`d z-p)l0SJ#Tumk3#$TmCzU``6goG0#TU$Ijxx+T`x);uNe3_=9200;h*A)io&^1oJF@Elza># zlFQR7FtWA|>FiWY5_35(>^OdNu+YG1bNUv0MhY1KsNkq5?6jKCM{rB)*Ie~WWO7gw z+4fR*B&MLGu4ZBi+F73>n7&a`ozqe;6TJNZ-JK&vt-&b3TOZ5{sA^ zExf`1adW1f&-&ZNGMoM{k1_6*b0nX^K&~pqrxziH$HKgVva_>8GV`;)Y;Ns%4N4`6>F;n3%7#$mHoqfMVq+jbw@P@}g#=}F*jGrb!$TpBs!lS9bzr6FP zy`|*>jPvv7&nT3zurLhr{CeG%a{SE0($Z46tms)w%S}jT&Heop@cl?FN-|U=y(kNh zr<h3@8JU^U$;kxLvF=ZvJP9f)a*Ur*{Qmv>=F#p1>*dS1kE-cmBNzH|$p9i`KS@Lb z^7^y4*Yx27#_{p7h|i%&^T&_9nQ}y#^f9La?-M=F>e%}Rf8H@lXdLmYtQ3Zzq+vmK zO<0)f!-o&kt`!v(R>NhNYh0GK=KPJ$2VhDaZIiLEvLd6fwY7Eg@W^I!8TzsWU z_X!Z1B_^k+D2@6%#SdMH!sP>33~I>15t>^<$jS%6uuVGS;BZZ%L3e#{@qE4V^G64} zgL;+b9hBUL4f^xgvbC{D+!;2-}YXAX5Y|8wYIbveR*~Hj=X&DA2D-&nrn{Z-0T%+VR=%$e&G_* zi=f~Lgc66HrT&G10y@{#Ve<_c>tpVyiS?H7fHjfK6ET862KiFqZ2sCP<&dwsgS5i`<^WF>>)dV*~+SZ=ICNu&_3zpQ{;C+Bxh+S7F zu3!D=Tth=cmFp_4$M2Qg%?|J1E4mSz4Oy$h<#@2y->Mx3@^wW#f9JPJ*0`>Qe)@C= z*`VNvh}PMTH^#QMVX)Y?c8N(z;qXuvHnww!8@T_v=rC3j(ATG--{3_fVA<`j&odaG zwmbnS%od)#4vhRKyE`Kt@JJ1hWOBw=ETa++xiSzYzqT{G@SYr=6U zagyF*m}kzk+C$b4gzL$dRzJ)vFXug}&&U-jYPs4xK$Hzy+_gEa6D?B2- zxL9?j3U$@|3ps2zHl&=rUmd*g1A@2j-XWMk>-ldF#xR3Th?9qkO> zQBtBXoQLgv+T1LI&mgA3ifh3QUQ8|W@B*xa>&j62PD)%U?GG}A`jPSE>raYd04?q9 znFh74ao3*|4r~eA4f%mT=ijeX&a8~l;JmdyUcZT?R_&q(L+{PQv`v-k_%gq?76}(qXMjW=92waLSE#qXjfS{& z3`OVqqFNwBa8dX`b)oh5cSztu1s}L=wo*B*jfzGX>Khswp2fmqk&rOxOyJe8A_h$X z8KBPnyOXe6hlgH}&2*RJAPXb05|WtH#^j9%p5(Q)HN|^D8h=dte*3)aCWP2a zuGr;p;cIFt)4!WT06eHjJ3)@YvVt}9<@k7H9(B9#)arNh8Poae%C({(~x z7^!rWgA5D57ya@jCgc#OBNeHmv*2Yk0v1-=;_oXeZbvane1(vmEb+GC==Xg!gw(hzoSL_YTJ`IXTv=SBb$Mz?8pQap8U3gjoGf0C(r}rBry=X>y@A zleTpdU@4zz8$JsCzz~~|z9~u6`Ef(x7rip8=)DJ!f>|Xb7$BkUE#(ynj(lqw5AOMK{`p>Jt$y?NZJPqJ@r5ud z-lb73?KVDfar!@h{y=)}7k#Z!|H5N2KcAU^`l>XS^Ju=VKt!rD;15PdLdZbLxZ=_- zgs@fRH6G`M@-4cPa-9`R0HW;gt`k-3JA8e%xl}rxOXfrAArBA>L0M*-o0}#hPg;UW zh8lgPz~nb)qNFkaH0bpZ;o}2tY=SAfR7AjSxPe4Wgs8pEb|#?#+@7egFj*g;hHkgV zt6P1!>L?A_>Bo!1Ts640AcxR{i*Y zwIl*>SJT(e7hrw`Ml=e=HZy)eK&tDKfHMF?;8g1umZSnqHHJCGiMzkonaKg)+xYeC z+x6pXe0(IRuNk-S>fy|MG{xI{x!ruFZGyhXUY)5@hKITfnghrW4fez(BwQkcC`=41 z9g@3K?)fdfIp=qy_TqW@cjNV*w&LSpgIX z9G~e6NII3$Zu$2eI8!z0T8aO4j;L!4pYEiw7tG^pBPXZ+0XeLF2 zGoZ9o00>2Ne7x=)_nEfs{AaG_B$9OyF7avAbphi;xF!M=7bUrW$F}|^%MD{=<{;!i z6oC5?m%jrOy@FKgJ7KB4eYM^rB{FW`>I#G18Va z1&x|eUi*vLv}8|l+dT*J9|GkeAG!unRRJ}Ul2Z*@`|1j zAb*rb{lq|#A@8x>nvEH$Lh|$HXTUE)c9sWczN<*Jet3>8G=SH5p+K*^nMmqTHP^JN zwst1bvEhU8gx7PxtN?kH@+Ab_f0==>Wwp&i8hQC*<~B*&_oalcqMBF2S_b}RV+Hps zIb&PU_{N;d0WxY9_o7Q-qBN=LmXcY+PbH#5g2OF;yf+tdSt18KZI^=uXZ5dZ~(!^2zPUr6eJXn-VO@{#D~dcQ(FwIzk!8h(oO@Xpfd ze#>uf<#lRZ3z%imFQ+~RLF7_h!O$k_`;?%X)fLBg2Jk8@Uax|p$o1>cx5gqW`r`tP zJb>kJI$^>~Dk&{LN-ddV6-u0DZb!`H;Ksjy-vi{(L|9=bQOH<#f?8izd)Rk2RvU$a zbGx2%*!gOx(4f}lx`iez19{pK<^0M@c%ralD8>3YnP)a|Soq7); zGyZsy*vuxN95DLm=xAEq0YE&8Ng`^aZ`~Vyj-3#|vLST&REPpYGDE9|H;84puA6hjSs{N&^FZINgF<@Y@2k ziQnrJMAEf6yCCnv9+y~uXODPruh|ut&<*%$TLfLUX6QTbgWquS@%;)baUT!@xS?~z zt07&?7@t-U2jI-7uC7+k_4=U6Nu!7g8&E?KY*Du}aG$n53w#sVS^y#GvOzX%CqEyF z!i7qkYqje(v#jCF0s>@+_5?WzVb!{&7Bok3fhPZ|F^zV0blBb-pWj{T#{(SY(O$_Y z`snQUY?UC0oj!kNsUmo+CwvaByyh{0R|he`et<(_s_qni?&%5A;A|bOb{4X^ZVotQ zvgzq`u^1#6LHl6@|AoM{eZcp@FqBgzudJ!4f>067D#yalPYT;5DJcou;TEI;V9$Vp zicQ-I3uwkslQAli9{?pHz-WFR8-gT&bvV+H`e1jxz0Q3LNy8#yVkY34R*)Vg_a@Kq z;ptI&?}$&sVZ{rfXe+9*;pAPWh$A)Q+1N!vG>hOo=$ zbhE8LM=di6a8cuSKAi5TH{R%b(xC3rVixItAe8n?NFHg~{woeq0oHNp*5G;5u{oFM z$a4KU9q@}*(3@IZAd)ToD82MDCI&*>)Y=*$5?AKu!|X>OF-+F3H++v9vIV;WTQ487 za2PCPSXo(toE-*B0y>4saGAB3WfUGEVcXBoG0bl|kVnA!BaavX8DSFmy57!3;Px9} z3mL`5#9&CEah(I5o*2|1N$>qM2zn4&>P4o_VL|VxsI=dAtl#N$&RHM72snuKb~y2M zLqIRE;_!Y}@Mj)NOG{)?Gv{9e?80X9x)5pe$i*c9Tq!gx49TX|`p7I0R<|(Gcp@;v zMYPkHE0dc(J#E5Qy#O%)fR1L)rM?c{#w1OB{loWA0v1AFtp=6`$WF+HAcs!Sir8Vi zE(Bb8w9Z|9CIW<4&T+sLc0b;q17y|$>H$ziJV+A2g-!c%)bv6K05&7!kc#)gO;uH5 zpW{E4{y{-uREywpSM7%dOYqXg0DLBiIALB=PHKWo)N1wy3>$KQHN2KVpOB zB_=2L2jBxzMuvK}a-5JYCEzRwQUyYIH{5x}ylYlS-D7+vP;1Cc{wC{_Fh!|}z^ip55i@Q;SH8(exfk3_8U0(~5$FQ26 z$=9_N{rg|#a{g_`Kxul{!|;|1(l3HjxNljUJ$H`z@@08b1QmcBUhF)t?C6^a!1BJO5Yxt(t2$jHcz-@gN+qN2J*x(p(g zPLCa%doGdw*i?n%H}YZvzxKN|TuoaWVnQ0Gi4CQ zTW7I}7~_y*s95-gPm3gKu>cMd#Ln`=@sAq%uRCV{=_DceuR2NoZyU$fuU`oo=1lAL zyNvHkO9@W9J!7?)(C;o2681ow>O=v$*(ZpD~;iDe=T% zwCX7oL@-d}URz4^A`hEdNMa`-=~*rjm-W5&On~HQ1koGi2a2d^TR6=>Pv&p;OnCqB zwBPB}4*u7>V|JB4UQu6*Gxk5On%ULgzkg3Z($aeU|BEH41$;(SZlj0i&Yk=8>672R zT(3FjjxBoFRTi`;V_Ga9o7~TcEK}=5{ zE_HH9CMqfl_^i03pa_VY3Mr)Q8WX<>wnzQp7``=8$YZ^XwVs%4jv2v9U2I#w1K{KAR!Z z6rv`1ZTHGicw8~p1+4_=qnd_>KdjEC4iLSKjg2P2?d5CQwu$qcxUnx^KBE(MNCP49 z9bu~#)F3WeID(UX1+m6P)ZymMv#6%AF$R$TQ5y5tSy(V(@YDhpiUD+48qQ{BW|J^w z&_x44hUg~h28zqf$@%4MubL(W1%)wS0zl6=uN2r;R#&G0CPCJXS7eu52hXSis+1KO z{o;jRRh7ul$OuwLB;9=j|AyO!K%Jg}7+qXhIp{9V%gYM~(pUBzSaU*Imz*X3NLY)S$>h45V*#qdTXdSY% zv-?}`EmiyB`C)teaZ6iU^OrB@0e8H(bJ6YBPh0?dpdW;jT~bEyjcmv3D=v(VvtMG7 zqEGqnM39j|u1-r!lg_hGijIDYP)05;Jiv<&9z4iV&n84^-q)%k5;7xQg4YJYMx|%k zB3eCrV8|CQU19~n7&yyNl@luzKrD()6Of(Q5vP{$5CmB+0#L2#hiB*deGWlHwmS6M z-?rv{P)B|6dsP6J`nQA0y2%TbN29XPOeLru)+RrJ$l3sbtXRN zIYmfW3#%>~n$Gjx{d<9&@W}|sMp%U14}Pz$E@83C5o8>$tLRMv5HF}a@nhNFbntIm>*r_CUjlE-5TG9T9=Rls48u-Ad_0TNulJw;p41l0rTw+kQYBefE|8Lx6f<5C z3(AQ(IUIZ;x1k)S@!e&8oB?15sV@%)M^keE9tkC7V3gD`wbb!mX7|BY5OWZf&Rew7 zbybg?oP0ri0P(%p*mF`+QeMA*SkfB0Vxk{t#VVmCI-s&3*igp}4|>UVmzb6|6sS`g z%(wjaaimcCryN-8AV}k9RE3gqTuVzzZh&GG$tZ!|+yj*vZhQ^_#1Y5AkH)Xok3@=r z;F~d*RN6ovm|R}AEj#;)2Q=d8h&wlL`gil~6VM7uL%?a+o5qW1hiX(rS(#w+paCF_ zM~@yMQA*T#&WbJVOCH!5lyt7nbti|x1|z&KldCeFjh&t4$`t~J>#i+nH=bX3(*eq& zL)nZ(*nZbAe5xWPc0AS2L^Z(2(|j(J91%ePZ0#Hu7Z-#==4;oq=ABWKTRl>PRoZnP zJDeahZCBiYXbS8!DR#7ALR$G7c##EvVYa1+4CN|YXwh| zDr(UB%a9^s#iW`yIh_e$Dj_3pc8Bv;$_s6!5Uv##CZLeM0ZH-#D2NatK7!_p7W(tH zyQ2foZ)x||;Lb?B=M#tstlZp+ik~3H7=hJ89DLf?*f?-arul8YV;rckQBNkkEA{!5 z69nbyhbtdWH9xXfM}YO&G>kD>Md5#hgMjc5UAuP8 z`%?&F|cg;@v&YfT_*%HWO3!~KnbDi;-vEBqw zsxmUV0PrEbH=r1P1T_L4EJAv#G;It0X4`??j^sCQf}m<^lgtD@1nMY~zQpK4;o4fL zX5c}MH={q;akPprkdPsig!T5q$`s-RunX2~m=faRSHWqWym-=RExE~u&S>2F-8X1y zW|jt}M~}fLi{Ea|e0&eig1}o>ux5bwA;JrQ8K5RnrKJ=PyPj`QO9}B^PoBop)AKPn zuJc@Hub7-d7AZ7WwMH^rFDulAXngkUS?aolWB}Gs89?gM5Ccs9{F?o%bs!|4?RO~G z6)0f?^K1uQTBy*4j*bo~7Um5^c?=GL;%@{6HiXIm9uVq|MlinPMBz>?TtN(V-Efdw zM{8W}&eYh#Y$g9c(%u57sxN#SJqDs8C@7#HU?3nVN=OT+NOzY=cXz4?0s_+A-5}ip z0wO8hh;(@!`_FgpH}l<@J9C+F=78tyv-VnVJn!?oYweTWzNgeoHwf;6G_Nw@ zY-(cCQ|Ic8(9A2f9qsK+ikhyUAS3!4D?A8FluY1Ns`KBck3v8>+Et_^j?WgHwq5H!2i|9An}d*0{e z2O!ZJT3Rl_4F7jDd1<@_UuKS?EOdG(6lnMZmROyrCa!hb6R@#ifr$LXWcW$kK`SH? z2?+^gB%tBhO5~@SH$KvC`}IpeL4gPomyxZd6;;wNL-3s8B5iCE76T6;c^jLXv0#~C z@n9THo6}9UgXh}-8bQ$&6cTa)ZqjCD5Er5wO0HA_bawpPw?6{{Gh_|DPR{)qFbEJz z=g7&)A>#`DM<2d#_9JeZoUD6&D-{@h7;;g{Ls*Euzf4c}N&cfba=EAN=+S{x~x1jX~sSkm)Y5K2Y02(Zy1p5vdgW2ua zA@9;6L^6DEC>X}O*`Glcwzh-?Bgjz5c}PGdWHz`z2T|5#yN&v@udjfC0S)8zcQChLaJF>+N)%oHXha0T+sCr1i}g!bFNa+d9VY}f4mQmA(#-uv|Yx-dks?! zwP^rw<1oE|G|3zt9g)IRK;S&W7~SQv6I+Rd^41%8DtBry{I38?;d){Tykz>_@vi_? zl$QDV`85HW0`PqioD*3{fO%j)1b$oIUS7zZkSwr4SW0>AFCZdvmD7Ix8q~5ZCc`%l z)+cW26W7>p#1D!+Se~jws0k1(UWi+$65XDI}_i?rXt3p+emBH9T-3YJArhp-4)Bh zxp9^wsG&j5dwQr%6ZVTX%s2B2oT=pvivYC21y~sn7eZfuGXSD+xYn5+0@X**z^0Fn zc1Vaj0C*rv4&t}gM3t=yaY%J__2l`#?Ga2uA1~pS1mht29vssWdOnLK1R&vRLoJ9P z-YSgM2pY$XNGTW^xA^ivdZb^7#Jt;r>`Qbq1Nc*i`glyNE z4Ut4*O|(VP7^1YZptkIj1yXU3fTxh zf|yYau@)#<3@ACguHE?na=h`E6M5RONOp5_@CT&&LE>nRYI3Jv-|Oc8_>30=3oD)Z(LVD}!v?Wn2rA z=k2FDmk|vj$djNh46$@5Byw|eOPmt@Dfq~t|Bx4g=vPqas7TMrfBORg%V4-LX^_L# z+PVqCg9Y?sK=C7}9jC_FDn#Mx3AhRw=fVoX;i3+{kF!PH^=EiiM4Xphj`z&2*uSPc#h5ksg} z%vZy&b|CeI?sQiNEpRAHFm7lDK-%AcjQ54kJ{_IZ=f69^eA-u!ctGn`tFYiVk!8@R zspw$34bL?4hMPkI1Y|cPiNH0Oopue1?s^u11dn3y=OUZ=4TUz?~tFBfJ5 z0R#xW3au|b0KbvMfJpX6U((Xj;L!{M<{v~J1R?nuCHHEk|H=e&UT&@p+cu~eT`fD1 zYXHhJLl=kt#n41>bdX&IVLrP+f18X@*Gh0hXT1(rrwVD9oYc@Bj+J8XfUpLgi`i1z z3X#FQ_wKz|umfhzJ0#?$+u^h~ZOzpafHSmi7EBs7hfzayb;jz$-ubiSo09h9kb;a| z>A+GRNSHb;bdlwmLaCa&Gk&>Vvns$5I{q5^a+~=)G!!E0 zwM^E_H*YQ@ehT!`eMDj`5RQO4^DPvEN@XT@@E}z~0xCMJVm7LczPay7i=A}7K0n|6 zJVfdr&CS!u5j9L+&}DgEYT!%n(B%#25<@7@ETiMApOPELZmR-ZZPra{NG(Y3gMO{^ zFZQ%7p}LI2++4;`K6+%xz5e&_2S|TNWdv1CGsLIg@T)#sTYG!WKZ739 z^~Yp_U~vdK%2yx<;Qyu}XF{gCgwV?Vr0l+$CZxSUoHXD4{t^2+G9d78B)-Cxp)W%+ zk1V(qPOQBADy?0_;_@pq3Seb;saW2%lPDVL>ZX;8YC--&_$UZkNQaD7r5b&rUVgtf z9;-pmKSPBUu0pd3&b)=I3wsa?IE~A=+o7P?0xlcMs%yY!Tu3yUtp{Olu`lDg)9zwY z@d2U7Q}0nwP|jzb-f}f1Y49Gq2Vz31+5ftZS%gX2@FA3P+k(541whSo^ znM!yp)OqLzCafFW+atKiJ8WAU0ly+mFyIc1`${0gUqsnIK14KIL zZ%XNqxWC+QysF>{X%^HgPf%6fLi-zH34s1A4>z?wUhj5Z$t!0WO$E@>m?|9dMCa!X zD1-k(R#ypmHb5jM5cZ&7?P$aOET^=qXOHJsvPPjQ4Lt)(fHmO3@Nm7*AMfvr$8i#Z zg!96};t5c|Ddp_%6B4GLIfpMUPivW*2Nu+yk^t6&Rx={!WT@|eHM@!g7>GtsnHVPl8bNucemoK2=hqi4E+sWE=^a)(XdLoC z+_MU6jSW5(8FE)p9LX<{`uZ9{C53cMAfbhgjR?>qt4hJoZ33ro*aEji_U-Ytwr6`A z&_IqK#M{_01I&|)UbijOp%C%kLC1wsu^ukq5_tyZ(cxia$}$xT$PtDOV74M!w96L> zrcgT<>UEL;j*Jq{T!?}>hPvi!XlSZ^_RFLUL731Vt)B6>!X6XVfIbAc1d&rXokQ+& zSiPK4n*Ryq5<?{xh!7=ma-m&oo)@K^oT?@-`sOo_H#(01QYG1nzw9|`oX!FbaiBi^0p|viuB1Va zo&&gSXiiPEmbv(1pgJGAD8VZ8Y)q*IAKm(t5Cbok)l-Fp-LR4|P1*3{3NJnj2^5?tKv^^b%Zrz@jo>{Gilqg2clp zJj<8uohQkg(IkS)t{YK zw;)pYRwR1SiRs*DpBw5F0$^@`P2PDuphV<%y4?nSZS4ynQX<4Q>z9tG8!7oOJ)Wcv z^jzWBYWealAj|K`HOZ+>X=shIjB(BLf8pf(2;8M1VUeQwe0~ zU&vm(VUR=}_oSpe+q0er%2Ywvz+iIxQFeL9Ux;n zDT<>j0iB0AYmoxjaBi$?>1crR1i*4^t=laSxNNCHplt(;^Bk12{>py9ZhZj^&Yvas z_v3Zmo{@(l@Cve(3M;QC4i7Y9&TRcpsfPNso_3EV+Owq$y+07u7S^T{$>Yq18n}j9 zZc9h_dO(3l1q%#e(ldN{DJc(xfq66u=y-G5n_^k~=R~bDvBy3WNb|0s_TRXDJF}RX zkd)L9koEBJuz~K#r;9I_E{4lw{eVK_$LuF3^}=j~IRFJf3N8mkx9 z%@gP?wTi@@14qc6obL6AShBF-4b=oNGK;l?&<>lp$7Ot#z~>^|)GG~*cxB3DNZ*$r zgnNGabcg$j4EmV@QbIzJh4Sc%VrvhMjJzBrrdJq+W!ni`wf^-WNB!1s^MeozT3P#v z3veG$3XtsjT>|RSsSZJlNT0{gpP4lFU7~l|LxEL#LW-s4}yEvZ@LsH`H>nP8sENxl7T=Mpt#?^ ze~&cgf#`tL-w+cMVLns@KOzoN^qUAOu<}VK(eXNvE0AVoTnno93@Ek`KBc#}*J82f zzJF03}XZ!uSS?L za8504?T^qHbPu}Pp)VRD-pzs_usYy*eJ419@~2}1Sl5sA0l(p{s;o4u;YBd(6Qh0_ zB)~w2k`N?Q2o2{zKbmqsBa@bv2Am`S+zBhmFTl;Ol$35GbPKf7BHYP4@q8%o?m;$( zRAT^V5dkn)i7Fp>i?FSbKLMd=U3; zka!LJOiLhzW|HZ~c;!dPdP{1GQK<^xdq~j>4cQ}a_fVdxKcN&rC`_2^YfI|b0 zy29n?DTr2JqiRV4yAZwspb9Q@#o@{bN=?DGNtm7_pb?>3d$#QJx~g z!daV}6(C4_2WkuUS2Y4z%Q6TELkk!%P8{kErNa!KrD)4n=EGIw-FK;Feu#Nd#I9(OoqPciUR$^pF`aUIPZ5N{{@5x zh5uHm4S{OqBYf-9st>YJpk}vM6zM2L3LgNVwXIJkfCa-@{$N ze}W&pfGAW19a=>egb5nZY19Tv(!z2F0oGvv@P?*-c_AmjNH;0#=2)w=u?+DYP*Hn7HE zfe^I=#tG9UTB$$HXzY$4SGaS0Lc&iFCj6aO0non%(F3*`2J_d8-CWNSo$6LqR&D@S z4xti%dMM^H-}1k9xW+_845iXyfVu6@X4NqPR`y*t5j%UimA)2WcSP6KvH|_MJD2r= zNs1H=a8boqi47Pbkbx8j&UX=dx``2kg#EFMo}ly6?UFUvV_7VnVQ(P%l+>msX>P$Lfb;F4dcM5E*aNWX_&tP zD31p@s{wl7nu`ugrI)4{goIi}2w&8Ls0XP4X%K_$mDz*)rz2&iq+qM?&^OpKwAsl% zg_3ghisy4DM+DURL!ex*LCvC*E!Dm|>AW!wp^I=d9fHZfy}5eLj-sONfOV$|HnH;_ zuN!{3|M1}jU_8zxRx!FAZwdW-f@f+1unf@mG~7}E_#HxvUWs#MZZ16_I3QENbpE78 zXnY6>kxdI!{Oueaq3OnvQ&DlKD?9fAk<6baLbog;Yr=lLkcO%&F(NRf=>0WB_ymo* zVo>6y@nujGkzP1JP!>>6#qnqx`yrCzn|MwkA)%VXzkcu3p8>~4;dYp$*5)evmf)Ae zKRG^|mzCiPfZvxd32nA#h2sRp5S3QIF|c=1*uiPPSM{S?C4!Dc8L>!%(I@<2Ty z-C>YG5sn7-pyUi(q4|?=-!Tn(J&kV-9_;Kq8@u?gO(?3xTKT3gJmAw)g&jWs0^vgL z2SMiK>!;`@=rOg_m*|q-{#>+}Y_tJu`?q2)zy;{W}93GXi?3 z#G`e>SyWQdfImpyM$SXZJP*j2zN-bgEJ62yn<9~O==F$x4M7W{2BJSa*AnPU7Z)cB zZ$pA7h!T3rQgxh{1-C^Gya)=3|IO4=V>BFx5(b@pfBycJS-B~d)~55+!NEb!fURM) zMNOY2R|3fIzM~2#@Suk#n*(|Jcz400>kt09}gZB|F?0Iq?B$E9;(Ebs#>oCebSIesNUZ*q;Omb-;m5LV@Nd?T02(w!^3Bo3h>? z-G!GEr^d8<20L5(lWwbvS7t<6n|Njk0_MrKG+N>5w2tuPJl&BvPT2OzYC`@>XcBXZ zgQe`D?#KDl@%VfHGzukg_N`!WqHsw@cHOXJt_pdVq0tO{mi=x4)p#zf@W3rQbETww zYqEaZeC=ham)-4C=!H33AdYwnjAr0G;!; z+Q)c z*ss5O^IK~jSs?ygye98HiNI~m6aWKcAY<5~O1km8$n?$+$BtD z&4d57Jv)c`KG}fxG^%5<6)`CInF(&6`}wB()>!nMY0CK*BHQY-Itzaw&mBJF!j|Zg zD0FYb6Q6SET(m6qhX+rD33l6K?#~2@#jSAKuX(2B5H+L)6%SUKlG?xBsmkLlH{N`A z;Y;k3YazUM%}X?EHoDcmkHgW0D8GpA*2T7m{#O4w-C(VGH|!Vl%Kb;DC+sdOmEZE$ zv>brlzY}Fo$1+d6_Zz-A_Py0L)$MV>3et*TcHQ+xc^sXyW_Cm0PPyxi92`OTL<_CN zVymd@EgQV?VAJ5n14a!2mvMKCb*se5FV->kDk4f z`D%4uJk<8LAK}|huf8gsqcGAXprzY67aL0^PId{b4mJao%9^L7Pn}%gr|$auYtH!g zr13Qgb?)r)Jh>`=5@HP91162e{5)rTcv;6))k))%iPLaaBK=V^J||2#Gj{dvFxPMl z=e6QyN;#}Akx#@@TKiI?ZaztEy_`xp)!bEJKNI4?0dqB$#Cj=Uk?E8hZi{m22v-|h zLSS`Iv(R~kS1g8SB(aEykm&w$Yr>!2pLd>$-g#GWCbL4orap!igq;(u!&0)|0!V;- zDYsTKmN~;*GjyAhYP_MG)c)Sv37NRDFte2NJw_>Ak?fs`MZe~(Jmb#rCAtC(V2R*; z{G$^#aXe*4v}Mu>{-4@%zPFT-r+3H0Arw|eDxb1b%_1`mR=dl>_(o7HhcK(0xI!dePZKS-9$PTO1@lJ5Qs?I zwIyUjfOrmDg!3B>h4~A|E{|T%5dUl&!9R1$;TY$c?5S37p@_Q7clI!N{rFOvz`2Om zj0Pj?rX?H03NwyASL`ho_|C9U6>Nr{;igkwAc8d)YkjUg#eqIUHImBj)$LrHMn~x_ zWl0NeFQm1C4y~o9HJ`iEztQI=!`29?Q}J3W2W+^ za@v%#uy16-3EeQ+Ku#B)%fC26@r#v~25iZ#((>lxHv8t*RkGFTtrCCQAQ2lg>Jn+N zQ~}0_rnZqQrjv&1dZc@gu$>+3-LOSI5g;RgrI(UZx=5{0?O?y|0V5t9X5;)zeH<0{ z@U(-?XvD!=>eW(K_C=Xz2kC^93XF=bG`F2#%5QbIjYx};u(~zxMG0U z?eOiY5D5hhs?c#lA@9Soad_Sk&G(3ddT#p2%9r}N*b*CV>+TKEz$w=7dqmpgyN<5j zQtWp)UF^$D!-)yX!!JklQ#m1lqF25$Juv@ZY0N$|>su|)yk`k#`2@aGYyN2*@#7qe z_Q=K_s%+0-C8x{rui9&0GuV+ZHsPyyrB6DLGZ$lFw)o{bwab@Tu6jdLF6&7f|0LhB z`pHkEd?$S?UrhVqF+uWJSn!BB;}s3m_r#zJv^6IK?L+5Y&{OKuzyV2SwN<`>N3~N} z)n)6=&DK@Uf`o)bS`G!wZ-ndoU}9TluTy`PwIP}%zQ_6Z>Y#g(v{-DG>gF`T#RP8e zE_E)gmT|+)O*A`7errEWbm+lyt3j?>6;XTrH56)>bQIf^v!rqLal*YvM~p4F(Xb)d zr+d)VcYmYom9!Rqz#7;4hnN6u+wV>`Ye(^s&oNf5oZOD+hUeK+4iFi%XZA}uyPHUL zcSj)rCod-z{00Z|Qj}AoER)6#%yb}Sg;hs+RJX2pT@$_QyD+ukx>W?Dy3ag~SI1|~ zuWwERbkgsWmWS|%8kIVqHVW|{q-P4v@^VU^!z=7XCX)*!4|OlL^B$e2#)}tdWt@y; zHYeepLbFrKT#*Uhc(OoN#xEZ|eKi9bOFFra1SOw3Iv2GuuP8sQ$r2o(s@K%a6odiy?eS8*e*8r8n0m zD+MrBTYcT{(o3IG;K2iu{@SiAQS;7Y-1JvRs}wiB*0^%vJ%E|xwA*Ug|ES7R^{k4; z%F90w7g-16Y9v-4&T^DEX0XuV=4EFxzz8L-mlvQ-=)jLrzfK7W)_C z@dwn>#xjw~fdI0(ali|=SJ6ES*)}9OQZ!_BQ@QnWujsgA8%qyHh|@-n5vei7yOB-)l8C%cz>DG5s!i) z@(Q)SP?7&KB;q2)zUg8Nl9DY|?E~4r2F1TG+~ND%dp)Ar&UPu7)g&d{UpVt>B#&c! zOE*E*MLIUObk>{~hDAf}_^qCLsrnBX_Z8JDT?ip$K$^_j^cGZxD1@tFf+`3%_X#XF z!LUKl*~4qQd9}M!c{^xwtE4KH=gxW5Z4Rx-t~DsFoUX6>bgq>BjUZ*8-kGfaTXo3} z7lqonFu%MSJY2a_{TalwvMC1HcGEYU;jmM?P9CI#CaNBd=Zb{%z_k$8?d(#QO8E=Ut zmw=wvu3wK{ETGxQYz}k?sHpnn)4hIq!)iK!UA)uke5L8~LHeXcyDz(Q@prj%)Tgmy zOl@F*`sBGpT7mWbQMz}vkC43A41-8P`wuBmA18;A2S~hh%QsI=V_HSqk5^rXc zifx79d7+W z^}W^Oz~N$33SN~bf*aO3#r`CYTLh?kYi3o?t&I%Ryl1amDG|a7q;(N<#&$-4(NV$& zTfD4?eeV=QXb&mh;-LnlSnhu`7J-1yEc<%8eq@XDY>#{x4jkQ=`&6NrqNynUMe!8cY=jCl!W<0=U9DX9YZp;YEBse>A3<4~ z7W51Tc~7+axyFC%wcw-Lr`V(aRuVQ~n5dMF^iO{_mD_`jyf+68OQol1tNpV#ER+1J z%rOm5wV%Bv9at+>l=^!1`BQs4Jb{t0Gm;$_Lc@EZ?#NvB8;Klr8p@x>YPjq@!b-!2 zC#0Vkq#AkE?`fL!l5jcCWF!F_)XGJ;Ib6Ez5&TW4d{2Njvi$AL5EScK=oK3puA)>VE)uU2ZK$j(-f=5^rw zP9-!yJUu_crgUJtPI=u)bSK|1S|wT~dH1`xD1*$ii2fR!W6GkQS0uX!tt>a6JX2eE za-l9h&^xddPN>+~>ah3PaqT|9@R*Cmuk|lnK*5jw!S*43wp7vw-tn5IucgjAbjB2W zzlp@j%nl6v(yKO3F*c{(I3wuf3A9ZRB|~Jf)Rs`7u5*Xap?T@sE`e%{@l;mc@@alR zfZJAB3tr6IXYZD`o9Ud4=j;3TivlDzQtTE@$ak4BrH*X(hZ-uFSZr5WGI%wGe?&{Y z+I<^0x=cDCY8+wSlbf|uvReC!G3()q{Faq$k^okzuzhf<%Idg!ofF-xXm?P)THEYYwy@*mzDDG_ImyO)&G?=< zh4~|O;cu7J-H)xUb@ifS(eC8DdYOOs$$BQ$F}sHkO$TO+TDJGM27S!y21YjAdp_D4 z8IOnl#;S^?My4oKhkxJ#>)SV)44M@(#Tqdgtg2JC!ersRMwl*B>$MO^2dB(VS;=Fk zM+dcoSiM{aiuJ>a7`U1VNCEAHk#T7}cC>*?rJ6cV6l;Yme%a*94$`7o8gn6ke(#n5evxxStv79vbeK7uhf zXN=0bK6au^QB%&NN?|+eiSJ9!{4!~vX>Gl`S7@RF?YegI{PkIci#QCmeXLa#hLf#5 zL_M(0{u?D3%47Ux zr>jWttv{rhjd+owB=z1dzt>jc*>svP z`cLgzaB37zI&nK89Ex-hzQU)WxO2&U5Ug?~gvoCH#Dn0+PwQx-f$T@=85xS!@qFD1vs!!r)=x@M95n0mBIx6sD6|S=t%8sx|9i2WW>evszxm zaBgm%erno{CAWW{x-~H9KgJ4U@a1z7i%J(0p64~{SU-v8u+4{OS_?GvRsU0_%jvWpz1`>KNNyzXBxq zJk`}(uET7|I)r%d*#rjcN{ikTpiy$fIOG^_ydk=dg?y+Ywmh($6FjerXH_$c?9p{& zfpf^;QQs{*rT)D@J-K`T)_A> zg+o(T)>PTeE5R~Regsgv9rIwPFn=#*r4E1T4)wec@A9ak{v#zlE>U3H$l z->G>jsTeVB&X?X1WwmXQ5$?^@a%I&OuqonU zhbhcdxV^wa(6{y9kfg5tzYbVpZce^o;1Rr!{pG(&$+&uTw|tVuglA>5S^a!HSmLBJ`MJ536jR0-hHLT@!CCaU)N@#9Fc5`HgJf$98yD#?`|JGtb^G>JF<|eVzT&3%A(Zy2#yXg{SDu&I; zkDH~Elp_N0D+7qCBN#Mm)tg#7djB5X#Ni<^8PB9q7ZsNGs82X0ndobSXz-O!o))8G zt|=;Pt9~CP61ES{u zd(8!JOD5DBoTy8LN2@jK{OYd~ucn+kbsw#myO}oCC50JeSjB4=6BGHpAD8g>^w1p+ zOG^SLC^f;)2VmYhafdROt3g~Z(r`2 zo4;-u{TGYm0G33U4PO&VwP#`RBEPLwfu;k>`Y*D$LRK`xZ^FQ>12>tNIe?Lxz9 z-6D9vn(fv(5{D~NL~6H+YifQW6Zh{HKWHdpOv6=V%#U+zN$r<@4owL^r5Kx>7~@<` zcwp}-#c0LwNdBWntkKC)P~i;*<)T-95#~iBc3@&QPxL-@lIk2Suc=h7Ka5#ZWQJ z3BI(qVX~w3d?$e}&+nFA?HNSQB1iG?ywP;soU##aeOLpw`*K}&I{0eUPVw$i;NCLb z`Ypx>lijB4q!NGJ=-*yx?`-a#tXw+9>~gyztH;lR7s$$waeAnzolmP z%e@EG4Af0q=(xZUGmW!?W6*%Xj>Wy31pLe>tri!D$n!@89Pd6a24bEGJ zqzhkg;!7j+OCQF~*Q>1*k=3WSe6B(dCaX!IRFL?>J;u$umNz2>v$m(&TEjWDZY>da z!}O-W$jUR}eT+IsGli&B_xqXEFW*O#nG3pxAinfhJ9Dj=7w?aG2_48}P^%h#w+=DY z!xxV$dKoj+p#WxV?dmrCz|$%yG5g$Ty|oXU+olf@(^PU&irymlgp=hPd)zy>v%m{o zEe}0k2P9-b=-fre+M9Cj&Afi4i=&~CH>JfM`sm@=&F7Fm~B0i)7Bsj?1B!=VNdnLw6-1+Pbu3**OSrqz2JBTiwkt9~grISyU#QZiu1$NW znIN~IkzzwT9jjNz);Q`o1p%i;QaSN}%{>|JXDk-mTxrXx?5j?{rz4x@2R*`5i#K(U zf2yC9J#%z&GU)6zG$PviqL7by1Bp{-Wu}%*JIii0pwDAc(s1|lxe;>Xr@AWLOW1oG zxt1x(NVo{LnOzAXVb%*k%n(=L!`||6!w5N-d4r07!nYTYOrtrqzE%m9DV$W5hPq+O zkO)*1GTEEd)EWlArcR;BD)%_ZCK5`gj2urURkys(=ZxZAy6jdM-MR5?KMLOP%~2R4 zcF2Sf4fEqyGn#i&BfR(J!SH*{CVPYJ`Fc_faajKxwQcX@=$KL<%h9eFGLW_KhV1#R zg0K}kn!^So6>j%rfFg+yvuk-5)Ypc|GKhuhf)-Xu5YxnGD%kcl74!qzBqfQ&SHMrh^tcB zb-G(WALuo5vaj9bJ2bGm^&SabtmcDnn69m-i$;X}8)mi?zv@tT=@HQ^WcKKz?yzjl z@^Idh&8d;~0_Bu!1^Vb@h(HD#r%Z(I^!HUqdEa&1jS`EBJ{r9o1hE@KKY9bM8ny4c`ucNahch&m+McMbV8hv&-VeBV z^Swz130dPFSllJ{T*OpW8pD$TWwva}UB^|SOdyU{% zi|fU)D_#K1AQdnFu+P|A+W$22x`5^Kl|cZR(e)O!cB{U{SNyCbsEZGpO}a1)3$33x z>g~MhK2rb65x}ZG@IN-ztmQ5-FPp-`LL0B3D~$$DIclG8QR zwbMGbYCMCTKm*Q#-ZjvF_8}Y6$r}2PDFWzri1UXkq@@R~PmLdEMYEVD0ruQo@;>Xb z`%(*uN7v5+Ja!PQtN4UBSw2J!6C1GJJ!)QB;}*z@(|*u6tBeuD+8XUh8$=nk^iK~? zVbBi7VA_CbhddUjj1Lz_5crp>SulTUJoN-YjqJvud3<*OLzm?znx4D9RTL2D^Kdwhw5OL$2|vSt3f*C z?~Ir#3&2nyFVO>rB`tS}UgK>)b6)l|sBWzjiWm%zEnd6_(Lf}SnTbc+9v!MHGM=^3 zO=!+16QBi4gU_IY;Dl>ul5H+kAD$v`^<4*<4T zalr*Lop#w9g?s@_QyO%7^88~yTJJkQ4nK`{KpI2g^% zCC#A1M+Z07M}57RD_0@d_VfBT`LPmh``8In`iCJhvhJiwHoHU3^YpR@tAG%YYZ zrg!%QuU@o>VZ#qJ+}dXFBY_TE+*CapSE z?Z9UoUm)qn;zVs6QGNBRB1<8{KN|~Aeut-uDh5+V5mV9sr1`cxHviZ*U+rt{bER~= zc{cZvvs2pOSqn=$p~DxKP_uJC+Ma=dE||mX!na*hC9q>oM6z2dLIS_iOF2A8b>VJv z2(hT<_o4FLeMhkK`g_3n-0{=5 z`eEUAIa1xKH^Q~jgz93Lh*{T8=ri?wur z3YQ1Mz6K*D!^^+?=XxWkCE_hHl|xW*FOP$Ouyis|ELZ$mzP8>e9P0R9zlT#j5Ozno z)3u#k!H9z8{tnfHq?Q2@BG1)4Li|S9o@jY)9AA4{ z_0ib*Q10+OE64)F(?_e$svWN6&VGT_A0BO%4QX1igC(=Nd^t)Mr6UrT{`mE6o*K72 z)T>zw-!1J5N+y#0kBa+$6YmglV7SBUdgo1KuM&qX>q%+Nnw7Hb@b3{@Z;1C0S^9lqG_Yi=W-Cs zoW6~T`ud#oq8nnNjI)igGwkIf5)Ou1v@=fGS+npy_xfx*rA!=6#4 z)nKKhhupMH^84)|L2ASl$=No>msOh28$YBJ4oPR|FBa5nx2uN>0vtyMxIAhuR@RC& z1m`8W^8tSTxvvG=7LpL31M~|~!tOab*Y@QzWVoGU?@FlW-^D1o^^GEfkL zEO0HRzCUy8tYaUYk)dz!BXY!-FFf(Hg1GOVRT;`7c%d?rk%YrcqwF5-O34jT?0E#z(tIwf4q9{%w^X>Q_g?UAVRZ9dLD)4D*KGt5H5G@hKQ?XHN9 zt9EAs>u46WdN-b{ICzrI4hD+AfkfgA758|~`=jfGOGLFC{CT5%8Amun#yk79gXZi| zVB~{~4Wx(~XABrI19s}vteBiW;V>p+wkvx#6JP}Mz-FDqRc4;GRd5YK>2gyIH~(QI z38x|Yxb(81rV{6k1+=<@BH(sem~t@vTz8fjHoVfTB=-VpTXe@O@q}VE7*cB33a<>j z0|HK1q)R6{={Zm{V(A?w-5m32`Ydk0&LgxBEdDI;e0X=i43&n;)t0~Wq7qll6!Z?v zkr?ia#-M+ixF;Y^1=GR%$|vh}ESdOaqwV=Ii=yg&n{%a@4Da zF=tK4t8}YJ$lW4gX(Tr57?sfw5h;Pa=m$LBq^KAb-(wJNGiD1vMb)x8<)Z^s2p5)t zfRlke;~F0q3G9Tic(tQwoO7TYgbceHz9~JG;n*dnm7hb@r7-DEu3g(Eu4&+Zklz*K zoc(*Df?CuRWv9*6ay>#T^aJ)C97+S|!*&wRu3z`gt!u)P1WI_MbOJ2)tFNigsRo8* zI(UMElb3;KdYTF4reae%&#m9tJ`h&c{7(b`JuTG|nrC+QHLP}7qoN^*Ciz* zh4;k!Ehj_ixMV-A4g^vKpza}`5PbwhLIfD}*^tm**~wL@f2evk*x0MsUbS(;6`|yA zHklhl)SZwCrHYp8Nt3|JV<>=N{uP~^yZ{+zD7NCY?d^TA#uwlahWQ}lE0IQO#DED) z#3v!kW~BQI4(wjX-D@nK$V+kfmaUkP4gfND)7yQ+Vv47#q>L+&wVon8U|+r7jDajb zJ!NQtQ4x4l(@CNfsC|%<88;FW3WY;_!O23o7Zj1inS%gy;DGoffN_RhyAf+{zWcmt#>FHrsl{%*wHuJ#Wl{<=I;rl=}EA%Ix#vxv`BGn%Y95B!Wml4tj zxhEr|Xv)X1Ag@Y{k~7`na;>w9p~M%d<0ULL*<*|2wH5|?jfCf&2jKD4K7A&Y3x(ga z55~B8p%fm-k(D)P7)^w4w}s*tK?_K^Ybppp?1(dep9M;=DYB7VN1Q>UgAD9E>rR-= zEFm3Z+&ja~>uDMcYhAy~vBIMd!%0!=xbPv8o&{kBjk;!%mvh|Sx1qYVo$v>qmC)HA z6`SHbdlN0YBUIwDX9t+00LJnQLK+Z4Mk5c@++FJ;Cj1DMx^T$Ue}1<*H858v7jjLC zy}QQsZDs}`K=zKEAgWx=S66p+D(TZq*w8HxR8S%tJ!aNi(bCOhrP?oSBt6mS#$A0^ zZvJ;^Dd|A5yVo=8tQVZql=1R!u%9}vZ^~zG`XNYZ4!($xyPeDLNqR7k|ADTv6bSXa zqr2&48^>ZBrXwj3h6kC(#*XKtOMM)~0QHXWEk9 z({`{g-9Gjv_&lxRK+R%UrP)}Wq4SBw-=^eHAjW(X2L&bR<`?5#Oi5V}9t+EOm?zHf zX+*5RnS^Bna43mvq00YJ`eW4YHXLO&EwH{6-*@*2of^%<+k@VjYUL|%?{H~VOQ=sM zy|4t_-`PQvMmi6OG90Ci@d_RILkWFyISLHSJUq40Ip6LLBEr*)AQscR(Wc9uk351? zDcozcYA3$EdVUVHl7lDz#Hw2(*3#;$r>7F(<|@@TUufdrfo7-r;J?rAZA{$nmX;vn z8rJl~2fd2)pN1o)6OQ$Z>0VB_FU+QmEfiMwD9k2<>(CNa(ezPIJL_?FBS`u~jO1)x8A`K--3@4%|6V1Cm@a16$7ndyIxkU^CKxU|p(3+E zoz^hX*$|pzw6*({vm}@bH%wsVjx6?gNZkH3ondN&)4npF&>lX(WV=GU^Bz0ZYA%(_el{qUzFRaxZhrNZVT;UCXISs7 zR`2I?%^H3SbM4#Q?i!=`bZY@2UV9C`9R|!qZYpsRmZW72@p7DkSp1pRmt*d;- z?p|Y^o_{Mb{F|X+c7=XmjrP0L@}W2SOd=b{A0M-%MRAo~x6f|-Uw8AO=pGo*_c;2| zd;T%h!c}|yvQ;CM#d2&BE1yFda=u)`4gHm)GK!iuIlt&7Z*?& zXO&!Q=7k{!Eg$jqBY>^GnP)`?rO!_e!JDxV3tYd) zBS^xd`c!vWPbbxk-BcxWLgy0JQigs4>)8|2pd%4k#>_|0Xx{MQk{PWl)z!At3_lUU z)-St$ow^iRg{3>abj7}E?cIW2k?u6BnCJT9>)X-F0g~yYWbgp_qWj3Sk>w6Qh^wWM zg*JqNB*7p%Tge1_$LGeGKUA2UD4xntk$hF7f>{8h>ul#lpmuMF5gzUK$~DzW@cPhwEi$+ zaJzmmPuFRaf2ons@Fd{D8jjh!IW}Pl6N&~21B!LIkekLuUM$8}w|Lny)6YuH7+`Rp z_;~5cIP-{Bwv<(W{NI-!9r5dJdselwmN({4!l4^c@9Ftwu05(C)P6>PXh5cH)tsz3 zjnuD>uVN71X)j?$vF0}nW2d4X+7^BDc|DwtI*#TKX83!wTv@Ie&tB+}omN11R&f^} zZ>ygLEOxUmqCqQiSqTIqRwVnHU$}R|YEI`W9^07(eM4I5INtRkvU*w=#r*G^G?mwk zDC{?O=v**QvO9^=;cKkQ)i|P-`pR514#f0ORNQype)i51&tt~ zR1p!7Ad2FsEO`jjQUpUFh@(i@G!hXAi>54zCJ}=y#z60jj{VhX?bzu|@85Tq?|k>% z`%Wh3-8&9NMj`|KiZ9r<*KizVY~|HyJwBSuw;8nbP*|51w>5QzZnMZ&S07ou@QBgA zu&&6-JqP8IY}y~Ox^E)&h@w)pI{Kr(bJo`-v>I>}ZFV#X{VLt2^jPTInfz#-;$ynL z_+c~93A5!V;wk&R4@#$gSmh|s_|OsdAh5)=KgMShM)F7nb)?o=hplem7BP+*)tNR< z5k}6{0Xd>>W!Q066zpgN${lJ>&8b5a@K8taL?_*~$FF=Qxx%^SPEx<;P4 zxxBhjsrv*?+dPPXv6yteNG)wiB634GQ4K!zXJrP|1ZdN19pCu}Civ>hLE!8RNG3GL@@YsXwD9euXE|I36%kz|LS zlZt^kk>f5Ngq}hguU~&VP9je2J6UH?*r+yT{}Ci=jOUz9h#f?iysUUWP|uOpQ0^EB z_!;EnKBVyF^blYNFpl_{F-iUU*4>m<#h${sWog+6Zfx&d%3 z1?Cvl+RP2|4hczH+9K!Xu9HCTY6hVo%n6+_tF3+Yq22SQx)oSg6I;JFfu8@NQz%#9hp4Hcf{!a(kwPN?= z1h!*bs|il{5Cev{%~gs_eTfpfP`m$e{h4KISDr2hc9tPa69+5X8>XG+{)C8O?mSWg zyG_yuu|rRC2WpL#!#)u)E1Vg%c)%p>YegEA^e1SBB_&F6{uNuBdh z6Qe1++)lj)pm<7zXmgxqd1Sq7Y_M2G1c!+;h_wxNkIX!-o|3 zw3@bFhZa`eyW0a+1pP@4Iv`|dGIp*D`MNgLTY+%%{E>ZJ%e(c>3+PvJt{F#u=G>88 z`cJU>sM;}#PLauaQq{u*UP!+W%P)2^`i<`(K*jhO=jH%_M+ z;==h5EAZx3IF$8BpJ5IvOnTN8H`l?N$XiU!R|v>Gd4ny~Nf8z#F|)YwKDszsNJ%4ds}_h$7@GNf72J zTM~cS5LdD&Vn2!Lo5Le57G$bk*ho(G;at0J8+7k9G8_&&U6r~Hdvw$+bxVM~j#aQL zgBRsW%e^j6#mB1Fr^V+kO{f0nmEd;aG;bv z3n$KTp!~V>3OxKf2CaXy+Nel8PY}RD;;Ri4dih~M?R9FE>JN!m zx~29NJO;88N1L6x&00{iZBcG|=kqBRYXOj$>x2csb5}dRSO|nBD4%9H`)qhFS6DXs z(mdQBjTNN?1%rsTWyN>_hp*eIL`%9^;Qbv`X}GxKl1H!t*~1?O!21+rgCv22?~ZHiUV3~iMep8;I3T;-DtQTB__uqj9VTd-iS+sVG@;9%7+s906vQ2+6aek$sMlCnY$xP0%VWd)eMwOA^+x~ia1 z0`51E#?Sy01X(SiEHQV~Dh}MP>yLH>$pA&F?$N9=f`dH_Ud(X$3tQv=vzl~HJ!i~C UYvv>e1Lwfi$!li?X3vS=0StSyk^lez From 75f90cdb6b3cb37af43552e063fca7012c38d638 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 02:03:37 -0400 Subject: [PATCH 111/171] fixed Agent_0 scripts --- src/radical/pilot/agent/agent_0.py | 2 +- tests/unit_tests/test_agent_0/test_agent_0.py | 29 ++++++++----------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 99a3e7d18b..86157988b6 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -419,7 +419,7 @@ def _start_sub_agents(self): for idx, sa in enumerate(self._cfg['agents']): target = self._cfg['agents'][sa]['target'] - bs_args = [self._sid, self.cfg.reg_addr, sa] + bs_args = [self._sid, self._cfg.reg_addr, sa] if target not in ['local', 'node']: diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index f9c8139007..83c38d7cb0 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -68,9 +68,9 @@ def _publish_effect(publish_type, cmd): global_control.append((publish_type, cmd)) def _prepenv_effect(env_id, spec): - return (env_id, spec) + return env_id, spec - agent_cmp = Agent_0(ru.Config(), self._session) + agent_cmp = Agent_0() agent_cmp._log = mock.Mock() agent_cmp._prof = mock.Mock() @@ -97,20 +97,13 @@ def _prepenv_effect(env_id, spec): 'rpc': 'hello'} } self.assertIsNone(agent_cmp._control_cb(None, msg)) - self.assertIn(global_control[0], [('control_pubsub', + self.assertEqual(global_control, [('control_pubsub', {'cmd': 'rpc_res', 'arg': {'uid': 'rpc.0002', 'err': "KeyError('arg')", 'out': None, - 'ret': 1} - }), - ('control_pubsub', - {'cmd': 'rpc_res', - 'arg': {'uid': 'rpc.0002', - 'err': "KeyError('arg',)", - 'out': None, - 'ret': 1} - })]) + 'ret': 1}} + )]) msg = {'cmd': 'rpc_req', 'arg': {'uid': 'rpc.0003', @@ -150,9 +143,10 @@ def _prepenv_effect(env_id, spec): @mock.patch('radical.utils.env_prep') @mock.patch('radical.utils.sh_callout_bg') def test_start_sub_agents(self, mocked_run_sh_callout, mocked_ru_env_prep, - mocked_init): + mocked_init): + + agent_0 = Agent_0() - agent_0 = Agent_0(ru.Config(), self._session) agent_0._pwd = tempfile.gettempdir() agent_0._log = mock.Mock() agent_0._sid = 'rp.session.0' @@ -160,7 +154,8 @@ def test_start_sub_agents(self, mocked_run_sh_callout, mocked_ru_env_prep, 'agents': { 'agent_1': {'target' : 'node', 'components': {'agent_executing': {'count': 1}}} - } + }, + 'reg_addr': 'tcp://location' }) agent_0._rm = mock.Mock() @@ -195,12 +190,12 @@ def check_agent_task(agent_task, *args, **kwargs): agent_0._rm.find_launcher.return_value = launcher agent_files = glob.glob('%s/agent_1.*.sh' % agent_0._pwd) - self.assertEqual(len(agent_files), 0) + self.assertEqual(0, len(agent_files)) agent_0._start_sub_agents() agent_files = glob.glob('%s/agent_1.*.sh' % agent_0._pwd) - self.assertEqual(len(agent_files), 2) + self.assertEqual(2, len(agent_files)) for agent_file in agent_files: os.unlink(agent_file) From 362debdcd04fbcf8edf5df9ac9a206944991403a Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 02:06:33 -0400 Subject: [PATCH 112/171] fixed RMBase tests --- tests/unit_tests/test_rm/test_base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit_tests/test_rm/test_base.py b/tests/unit_tests/test_rm/test_base.py index da15827844..2063bf4a2e 100755 --- a/tests/unit_tests/test_rm/test_base.py +++ b/tests/unit_tests/test_rm/test_base.py @@ -223,7 +223,7 @@ def test_prepare_launch_methods(self, mocked_lm, mocked_init): # launching order not provided - rm._prepare_launch_methods(None) + rm._prepare_launch_methods() self.assertEqual(rm._launchers['SRUN'], mocked_lm) self.assertEqual(rm._launch_order, ['SRUN']) @@ -232,14 +232,14 @@ def test_prepare_launch_methods(self, mocked_lm, mocked_init): rm._rm_info.launch_methods = {'order': ['SSH'], 'SRUN' : {}, 'SSH' : {}} - rm._prepare_launch_methods(None) + rm._prepare_launch_methods() self.assertEqual(rm._launch_order, ['SSH']) # launching methods not provided rm._rm_info.launch_methods = {} with self.assertRaises(RuntimeError): - rm._prepare_launch_methods(None) + rm._prepare_launch_methods() # raise exception for every launch method @@ -250,7 +250,7 @@ def lm_raise_exception(*args, **kwargs): mocked_lm.create = mock.MagicMock(side_effect=lm_raise_exception) # all LMs will be skipped, thus RuntimeError raised with self.assertRaises(RuntimeError): - rm._prepare_launch_methods(None) + rm._prepare_launch_methods() # check that exception was logged (sign that LM exception was raised) self.assertTrue(rm._log.exception.called) @@ -267,7 +267,7 @@ def lm_raise_exception_once(*args, **kwargs): rm._rm_info.launch_methods = {'SRUN': {}, 'SSH': {}} mocked_lm.create = mock.MagicMock(side_effect=lm_raise_exception_once) - rm._prepare_launch_methods(None) + rm._prepare_launch_methods() # only second LM is considered successful self.assertEqual(rm._launch_order, ['SSH']) self.assertEqual(len(rm._launchers), 1) From 6ccd216cf13c60c155d46bac905ff10a3e881d15 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 02:31:24 -0400 Subject: [PATCH 113/171] fixed TMGRStagingInput tests --- src/radical/pilot/tmgr/staging_input/default.py | 4 ++-- tests/unit_tests/test_tmgr/test_tmgr_staging.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 07d44b8fef..6bc4cefd87 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -287,8 +287,8 @@ def work(self, tasks): # j.exit_code) - if no_staging_tasks: - for pid in no_staging_tasks: + for pid in no_staging_tasks: + if no_staging_tasks[pid]: # nothing to stage, push to the agent self._advance_tasks(no_staging_tasks[pid], pid) diff --git a/tests/unit_tests/test_tmgr/test_tmgr_staging.py b/tests/unit_tests/test_tmgr/test_tmgr_staging.py index 700dd5f357..5d89cb3522 100644 --- a/tests/unit_tests/test_tmgr/test_tmgr_staging.py +++ b/tests/unit_tests/test_tmgr/test_tmgr_staging.py @@ -56,6 +56,8 @@ def test_si_work(self, mocked_si_init): tmgr_si._session_sbox = '/tmp' def _mocked_advance(things, state, publish, push, qname=None): + if not things: + return nonlocal global_things nonlocal global_state global_things.append(things) @@ -81,12 +83,10 @@ def _mocked_handle_task(task, actionables): tmgr_si.work([dict(tc['task'])]) for tasks in global_things: - if not tasks: - continue # there were only one task per call self.assertEqual(tasks[0]['control'], 'tmgr') # advanced is called 2 times for the provided inputs - self.assertEqual(len(global_things), 2) + self.assertEqual(2, len(global_things)) self.assertEqual(global_state, [rps.TMGR_STAGING_INPUT, rps.FAILED]) # ------------------------------------------------------------------------------ From 5e026e658a5f7c5b2fc65f0445c431aa7dad9aac Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 02:45:11 -0400 Subject: [PATCH 114/171] fixed Agent_0 test (py36 exception) --- tests/unit_tests/test_agent_0/test_agent_0.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index 83c38d7cb0..3db5074133 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -97,13 +97,12 @@ def _prepenv_effect(env_id, spec): 'rpc': 'hello'} } self.assertIsNone(agent_cmp._control_cb(None, msg)) - self.assertEqual(global_control, [('control_pubsub', - {'cmd': 'rpc_res', - 'arg': {'uid': 'rpc.0002', - 'err': "KeyError('arg')", - 'out': None, - 'ret': 1}} - )]) + self.assertEqual(1, len(global_control)) + # format for the raised exception might be a little different based on + # python version, e.g., py36: KeyError('arg',) | py37: KeyError('arg') + self.assertTrue(global_control[0][1] + ['arg']['err'].startswith("KeyError('arg')")) + self.assertEqual('rpc.0002', global_control[0][1]['arg']['uid']) msg = {'cmd': 'rpc_req', 'arg': {'uid': 'rpc.0003', From 22702912e2095f1cf442666c40f92accd7bba32b Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 02:45:47 -0400 Subject: [PATCH 115/171] fixed Agent_0 test (py36 exception) --- tests/unit_tests/test_agent_0/test_agent_0.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index 3db5074133..19cb273b71 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -101,7 +101,7 @@ def _prepenv_effect(env_id, spec): # format for the raised exception might be a little different based on # python version, e.g., py36: KeyError('arg',) | py37: KeyError('arg') self.assertTrue(global_control[0][1] - ['arg']['err'].startswith("KeyError('arg')")) + ['arg']['err'].startswith("KeyError('arg'")) self.assertEqual('rpc.0002', global_control[0][1]['arg']['uid']) msg = {'cmd': 'rpc_req', From f567c28f8bafcef127ca82c1dffb3760ab06a2ee Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 02:51:25 -0400 Subject: [PATCH 116/171] fixed base example --- examples/00_getting_started.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 689ccb960e..5ccb15cff6 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -5,7 +5,6 @@ import os import sys -import random import radical.pilot as rp import radical.utils as ru @@ -32,7 +31,7 @@ # Create a new session. No need to try/except this: if session creation - # fails, there is not much we can do anyways... + # fails, there is not much we can do anyway... session = rp.Session() # all other pilot code is now tried/excepted. If an exception is caught, we @@ -42,8 +41,8 @@ try: # read the config used for resource details - config = ru.read_json('%s/config.json' - % os.path.dirname(__file__)).get(resource, {}) + config = ru.read_json('%s/config.json' % + os.path.dirname(__file__)).get(resource, {}) pmgr = rp.PilotManager(session=session) tmgr = rp.TaskManager(session=session) @@ -54,19 +53,20 @@ # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object pd_init = {'resource' : resource, - 'runtime' : 600, # pilot runtime (min) + 'runtime' : 15, # pilot runtime (min) 'exit_on_error' : True, 'project' : config.get('project'), 'queue' : config.get('queue'), 'access_schema' : config.get('schema'), - 'cores' : 1024, + 'cores' : config.get('cores', 1), 'gpus' : config.get('gpus', 0) } pdesc = rp.PilotDescription(pd_init) + # Launch the pilot. pilot = pmgr.submit_pilots(pdesc) - n = 1024 * 1024 # number of tasks to run + n = 10 # number of tasks to run report.header('submit %d tasks' % n) # Register the pilot in a TaskManager object. @@ -81,7 +81,7 @@ # create a new task description, and fill it. td = rp.TaskDescription() - td.executable = '/bin/true' + td.executable = '/bin/date' td.ranks = 1 td.cores_per_rank = 1 From d5c6aefa412d5f28c4172f3e21fd99607c37bf84 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 16:23:05 -0400 Subject: [PATCH 117/171] fixed pilot publishing --- src/radical/pilot/agent/agent_0.py | 6 ++---- src/radical/pilot/raptor/master.py | 3 --- src/radical/pilot/session.py | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 86157988b6..8862f620ba 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -297,13 +297,11 @@ def finalize(self): 'stdout' : out, 'stderr' : err, 'logfile': log, - 'state' : state, - 'forward': True} + 'state' : state} self._log.debug('push final state update') self._log.debug('update state: %s: %s', state, self._final_cause) - self.publish(rpc.STATE_PUBSUB, - topic=rpc.STATE_PUBSUB, msg=[pilot]) + self.advance(pilot, publish=True, push=False) # tear things down in reverse order self._rm.stop() diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index f021ca5635..a3a7c16da5 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -307,9 +307,6 @@ def _control_cb(self, topic, msg): rpc_res['out'] = '' rpc_res['ret'] = 1 - # inform client side - rpc_res['forward'] = True - self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_res', 'arg': rpc_res}) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 6019cc561e..20167b5f54 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -735,7 +735,7 @@ def pubsub_fwd(topic, msg): if from_proxy: # all messages *from* the proxy are forwarded - but not the ones - # which orginated in *this* module in the first place + # which originated in *this* module in the first place if msg['origin'] == self._module: self._log.debug('XXX =>! fwd %s to topic:%s: %s', src, tgt, msg) From 11ea090b896779298313d1f8af83821d1e3b3829 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 18:13:45 -0400 Subject: [PATCH 118/171] fixed `pilot_state` in bootstrapping --- src/radical/pilot/agent/bootstrap_0.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/radical/pilot/agent/bootstrap_0.sh b/src/radical/pilot/agent/bootstrap_0.sh index c1891e320b..c93adb1cfb 100755 --- a/src/radical/pilot/agent/bootstrap_0.sh +++ b/src/radical/pilot/agent/bootstrap_0.sh @@ -220,7 +220,7 @@ create_gtod() | cut -f1 -d' ') printf "%.6f,%s,%s,%s,%s,%s,%s\n" \ "$now" "sync_abs" "bootstrap_0" "MainThread" "$PILOT_ID" \ - "PMGR_ACTIVE_PENDING" "$(hostname):$ip:$now:$now:$now" \ + "$pilot_state" "$(hostname):$ip:$now:$now:$now" \ | tee -a "$PROFILE" } @@ -268,7 +268,7 @@ profile_event() # MSG = 6 # message describing the event optional # ENTITY = 7 # type of entity involved optional printf "%.6f,%s,%s,%s,%s,%s,%s\n" \ - "$now" "$event" "bootstrap_0" "MainThread" "$PILOT_ID" "pilot_state" "$msg" \ + "$now" "$event" "bootstrap_0" "MainThread" "$PILOT_ID" "$pilot_state" "$msg" \ >> "$PROFILE" } @@ -1565,7 +1565,7 @@ echo "# -------------------------------------------------------------------" touch "$LOGFILES_TARBALL" touch "$PROFILES_TARBALL" - +pilot_state="PMGR_ACTIVE_PENDING" # FIXME: By now the pre_process rules are already performed. # We should split the parsing and the execution of those. # "bootstrap start" is here so that $PILOT_ID is known. @@ -1573,7 +1573,6 @@ touch "$PROFILES_TARBALL" echo 'create gtod, prof' create_gtod create_prof -pilot_state="PMGR_ACTIVE_PENDING" profile_event 'bootstrap_0_start' # NOTE: if the virtenv path contains a symbolic link element, then distutil will From 5f96ecde6d69de4498dc060a9187929658e86d72 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Fri, 25 Aug 2023 18:26:02 -0400 Subject: [PATCH 119/171] fixed `pilot_rpc` in TaskManager (uses corresponding pilot method) --- src/radical/pilot/task_manager.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index ff6cdd3666..dd092255de 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -660,31 +660,15 @@ def _control_cb(self, topic, msg): def pilot_rpc(self, pid, cmd, args): '''Remote procedure call. - Send am RPC command and arguments to the pilot and wait for the + Send an RPC command and arguments to the pilot and wait for the response. This is a synchronous operation at this point, and it is not thread safe to have multiple concurrent RPC calls. ''' if pid not in self._pilots: - raise ValueError('tmgr does not know pilot %s' % uid) + raise ValueError('tmgr does not know pilot %s' % pid) - rpc_id = ru.generate_id('rpc') - rpc_req = {'uid' : rpc_id, - 'rpc' : cmd, - 'tgt' : pid, - 'arg' : args} - - self._ctrl_pub.put(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_req', - 'arg': rpc_req, - 'fwd': True}) - - rpc_res = self._rpc_queue.get() - self._log.debug('rpc result: %s', rpc_res['ret']) - - if rpc_res['ret']: - raise RuntimeError('rpc failed: %s' % rpc_res['err']) - - return rpc_res['ret'] + return self._pilots[pid].rpc(cmd=cmd, args=args) # -------------------------------------------------------------------------- From dc6c3068811efbd9ab15e85950098fd84b3912b1 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Sat, 26 Aug 2023 18:47:54 -0400 Subject: [PATCH 120/171] added base path to `Proxy` --- src/radical/pilot/proxy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py index 9e13d4cbb2..777545b03c 100644 --- a/src/radical/pilot/proxy.py +++ b/src/radical/pilot/proxy.py @@ -129,12 +129,12 @@ # class Proxy(ru.zmq.Server): - def __init__(self): + def __init__(self, path=None): self._lock = mt.Lock() self._clients = dict() - ru.zmq.Server.__init__(self, url='tcp://*:10000+') + ru.zmq.Server.__init__(self, url='tcp://*:10000+', path=path) self._monitor_thread = mt.Thread(target=self._monitor) self._monitor_thread.daemon = True From 08064d9516f975d7645a5467d3f4918c0ad747e2 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Sat, 26 Aug 2023 19:39:11 -0400 Subject: [PATCH 121/171] added termination calls in `Session` for `HeartBeat`, `RegistryClient`, `Registry` --- src/radical/pilot/session.py | 27 ++++++++++++++------ src/radical/pilot/utils/component_manager.py | 1 - 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 20167b5f54..9e65fce787 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -881,12 +881,22 @@ def close(self, **kwargs): if self._cmgr: self._cmgr.close() + # stop heartbeats + self._hb.stop() + self._hb_pubsub.stop() + if self._proxy: - try: - self._log.debug("session %s closes service", self._uid) - self._proxy.request('unregister', {'sid': self._uid}) - except: - pass + + if self._role == self._PRIMARY: + try: + self._log.debug('session %s closes service', self._uid) + self._proxy.request('unregister', {'sid': self._uid}) + except: + pass + + if self._role in [self._PRIMARY, self._AGENT_0]: + self._proxy.close() + self._proxy = None self._log.debug("session %s closed", self._uid) self._prof.prof("session_stop", uid=self._uid) @@ -910,8 +920,9 @@ def close(self, **kwargs): if self._role == self._PRIMARY: - # dump json - self._reg.dump('registry') + # stop registry + self._reg.close() + self._reg_service.stop() # this will dump registry self._t_stop = time.time() self._rep.info('< Date: Sat, 26 Aug 2023 19:48:04 -0400 Subject: [PATCH 122/171] fixed Session test (`close` method) --- tests/component_tests/test_session.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/component_tests/test_session.py b/tests/component_tests/test_session.py index b324e4826d..2249295aa8 100755 --- a/tests/component_tests/test_session.py +++ b/tests/component_tests/test_session.py @@ -163,11 +163,13 @@ def put(*args, **kwargs): self.assertFalse(self._session._close_options.download) self.assertTrue(self._session._close_options.terminate) - # only `True` values are targeted + self._session._ctrl_pub = Dummy() + self._session._hb = mock.Mock() + self._session._hb_pubsub = mock.Mock() + self._session._reg_service = mock.Mock() - self._session._ctrl_pub = Dummy() + # only `True` values are targeted self._session.close(download=True) - self._session.close(terminate=True) # -------------------------------------------------------------------------- From af173341bc785e64fdc3d3db655234ec1f52d3c5 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 30 Aug 2023 00:29:26 +0200 Subject: [PATCH 123/171] ensure bridges time out --- bin/radical-pilot-bridge | 68 +++++++++++++--------------- bin/radical-pilot-component | 2 +- examples/misc/ordered_pipelines.py | 4 +- src/radical/pilot/pilot_manager.py | 2 + src/radical/pilot/task_manager.py | 2 + src/radical/pilot/utils/component.py | 1 - 6 files changed, 38 insertions(+), 41 deletions(-) diff --git a/bin/radical-pilot-bridge b/bin/radical-pilot-bridge index 9b55b6edc0..3623b2d078 100755 --- a/bin/radical-pilot-bridge +++ b/bin/radical-pilot-bridge @@ -120,43 +120,37 @@ def wrapped_main(sid, reg_addr, uid, log, prof): if 'pubsub' in uid: d = ru.zmq.test_pubsub(bridge.channel, bridge.addr_pub, bridge.addr_sub) - sys.stdout.flush() - sys.stderr.flush() - - # if hb_cfg: - # - # # bridge runs - send heartbeats so that cmgr knows about it - # # component runs - send heartbeats so that session knows about it - # hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) - # - # def hb_beat_cb(): - # hb_pub.put('heartbeat', HeartbeatMessage(b_cfg.uid)) - # - # def hb_term_cb(hb_uid): - # bridge.stop() - # term.set() - # return None - # - # hb = ru.Heartbeat(uid=b_cfg.uid, - # timeout=hb_cfg.timeout, - # interval=hb_cfg.interval, - # beat_cb=hb_beat_cb, - # term_cb=hb_term_cb, - # log=log) - # hb.start() - # - # # always watch out for session heartbeat - # hb.watch(uid=sid) - # - # # react on session heartbeats - # def hb_sub_cb(topic, msg): - # hb_msg = HeartbeatMessage(from_dict=msg) - # if hb_msg.uid == sid: - # hb.beat(uid=sid) - # - # ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, - # topic='heartbeat', cb=hb_sub_cb, - # log=log, prof=prof) + # bridge runs - send heartbeats so that cmgr knows about it + hb_pub = ru.zmq.Publisher('heartbeat', hb_cfg.addr_pub, log=log, prof=prof) + + def hb_beat_cb(): + hb_pub.put('heartbeat', HeartbeatMessage(uid=uid)) + + def hb_term_cb(hb_uid): + bridge.stop() + term.set() + return False + + hb = ru.Heartbeat(uid=uid, + timeout=hb_cfg.timeout, + interval=hb_cfg.interval, + beat_cb=hb_beat_cb, + term_cb=hb_term_cb, + log=log) + hb.start() + + # always watch out for session heartbeat + hb.watch(uid=sid) + + # react on session heartbeats + def hb_sub_cb(topic, msg): + hb_msg = HeartbeatMessage(from_dict=msg) + if hb_msg.uid == sid: + hb.beat(uid=sid) + + ru.zmq.Subscriber('heartbeat', hb_cfg.addr_sub, + topic='heartbeat', cb=hb_sub_cb, + log=log, prof=prof) # all is set up - we can sit idle 'til end of time. while not term.is_set(): diff --git a/bin/radical-pilot-component b/bin/radical-pilot-component index 04979796d3..a652d2507f 100755 --- a/bin/radical-pilot-component +++ b/bin/radical-pilot-component @@ -100,7 +100,7 @@ def wrapped_main(sid, reg_addr, uid, log, prof): term.set() return False - hb = ru.Heartbeat(uid=c_cfg.uid, + hb = ru.Heartbeat(uid=uid, timeout=hb_cfg.timeout, interval=hb_cfg.interval, beat_cb=hb_beat_cb, diff --git a/examples/misc/ordered_pipelines.py b/examples/misc/ordered_pipelines.py index acc029c1e6..b3161f402c 100755 --- a/examples/misc/ordered_pipelines.py +++ b/examples/misc/ordered_pipelines.py @@ -66,8 +66,8 @@ td.arguments = [p, s, t, 10] td.ranks = 1 td.tags = {'order': {'ns' : p, - 'order': s, - 'size' : n_tasks}} + 'order': s, + 'size' : n_tasks}} td.name = 'p%03d-s%03d-t%03d' % (p, s, t) tds.append(td) report.progress() diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 40523825e8..69a33592b9 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -127,6 +127,8 @@ def __init__(self, session, cfg='default'): self.start() self._log.info('started pmgr %s', self._uid) + + self._rep = self._session._get_reporter(name=self._uid) self._rep.info('< Date: Wed, 30 Aug 2023 18:09:28 -0400 Subject: [PATCH 124/171] updated staging-tutorial --- docs/source/tutorials/staging_data.ipynb | 236 ++++------------------- 1 file changed, 40 insertions(+), 196 deletions(-) diff --git a/docs/source/tutorials/staging_data.ipynb b/docs/source/tutorials/staging_data.ipynb index 170da3c051..1153e09e10 100644 --- a/docs/source/tutorials/staging_data.ipynb +++ b/docs/source/tutorials/staging_data.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -81,8 +80,9 @@ "- `radical.pilot.LINK` - local file symlink.\n", "\n", "Using appropriate data actions helps to improve the application runtime. It is known that I/O operations are expensive and can negatively impact the total execution time of an application. Thus, RP applications should be built considering that:\n", - "* the most expensive I/O operations (`TRANSFER`, `MOVE`, `COPY`) should be applied for staging between the `client://` location and corresponding paths on the target platform, since they will be performed outside of the allocated resources and will be no resources idling (pilot job is not launched at this moment);\n", - "* task staging between sandboxes should minimize the usage of such actions as `MOVE` and `COPY`, and use the `LINK` action if possible, since these operations will be executed within the allocated resources.\n", + "\n", + "- the most expensive I/O operations (`TRANSFER`, `MOVE`, `COPY`) should be applied for staging between the `client://` location and corresponding paths on the target platform, since they will be performed outside of the allocated resources and will be no resources idling (pilot job is not launched at this moment);\n", + "- task staging between sandboxes should minimize the usage of such actions as `MOVE` and `COPY`, and use the `LINK` action if possible, since these operations will be executed within the allocated resources.\n", "\n", "In the example from the section [Examples](#Examples), we demonstrate that if all tasks have the same input data, then this data can be located in a shared space (e.g., staged to the `pilot://` location) and be linked into each task's sandbox (e.g., a link per input file within the `task://` location).\n", "\n", @@ -95,15 +95,17 @@ "\n", "### Simplified directive format\n", "\n", - "RP gives some flexibility in the description of staging between the client side and the sandboxes for pilot and task. Thus, if a user provides just names (absolute or relative paths, e.g., names of files or directories), then RP expands them into corresponding directives. \n", - "- If a string directive is a single path, then after expanding it, the _source_ will be a provided path within the `client://` location, while the _target_ will be a base name from a provided path within the `pilot://` or the `task://` location for [radical.pilot.PilotDescription](../apidoc.rst) or [radical.pilot.TaskDescription](../apidoc.rst) respectively.\n", + "RP gives some flexibility in the description of staging between the client side and the sandboxes for pilot and task. Thus, if a user provides just names (absolute or relative paths, e.g., names of files or directories), then RP expands them into corresponding directives.\n", + "\n", + "- If a string directive is a single path, then after expanding it, the _source_ will be a provided path within the `client://` location, while the _target_ will be a base name from a provided path within the `pilot://` or the `task://` location for [radical.pilot.PilotDescription](../apidoc.rst#pilotdescription) or [radical.pilot.TaskDescription](../apidoc.rst#taskdescription) respectively.\n", "- Having directional characters `>`, `<` within a string directive defines the direction of the staging between corresponding paths:\n", - " - Input staging: `source > target`, the _source_ defines a path within the `client://` location, and the _target_ defines a path within the `pilot://` or the `task://` location for [radical.pilot.PilotDescription](../apidoc.rst) or [radical.pilot.TaskDescription](../apidoc.rst) respectively.\n", - " - Output staging: `target < source` (applied for [radical.pilot.TaskDescription](../apidoc.rst) only), the _source_ defines a path within the `task://` location, and the _target_ defines a path within the `client://` location.\n", + "\n", + " - Input staging: `source > target`, the _source_ defines a path within the `client://` location, and the _target_ defines a path within the `pilot://` or the `task://` location for [radical.pilot.PilotDescription](../apidoc.rst#pilotdescription) or [radical.pilot.TaskDescription](../apidoc.rst#taskdescription) respectively.\n", + " - Output staging: `target < source` (applied for [radical.pilot.TaskDescription](../apidoc.rst#taskdescription) only), the _source_ defines a path within the `task://` location, and the _target_ defines a path within the `client://` location.\n", "\n", "Examples of the staging directives being expanded:\n", "\n", - "[radical.pilot.PilotDescription.input_staging](../apidoc.rst)\n", + "[radical.pilot.PilotDescription.input_staging](../apidoc.rst#radical.pilot.PilotDescription.input_staging)\n", "```shell\n", "in : [ '/tmp/input_data/' ]\n", "out: [{'source' : 'client:///tmp/input_data',\n", @@ -117,7 +119,7 @@ " 'flags' : radical.pilot.CREATE_PARENTS}]\n", "```\n", "\n", - "[radical.pilot.TaskDescription.input_staging](../apidoc.rst)\n", + "[radical.pilot.TaskDescription.input_staging](../apidoc.rst#radical.pilot.TaskDescription.input_staging)\n", "```shell\n", "in : [ '/tmp/task_input.txt' ]\n", "out: [{'source' : 'client:///tmp/task_input.txt',\n", @@ -126,7 +128,7 @@ " 'flags' : radical.pilot.CREATE_PARENTS}]\n", "```\n", "\n", - "[radical.pilot.TaskDescription.output_staging](../apidoc.rst)\n", + "[radical.pilot.TaskDescription.output_staging](../apidoc.rst#radical.pilot.TaskDescription.output_staging)\n", "```shell\n", "in : [ 'collected.dat < output.txt' ]\n", "out: [{'source' : 'task:///output.txt',\n", @@ -146,39 +148,17 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:46.387260Z", - "iopub.status.busy": "2023-05-18T01:28:46.386951Z", - "iopub.status.idle": "2023-05-18T01:28:46.398220Z", - "shell.execute_reply": "2023-05-18T01:28:46.397345Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "env: RADICAL_REPORT_ANIME=FALSE\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "%env RADICAL_REPORT_ANIME=FALSE" ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:46.434424Z", - "iopub.status.busy": "2023-05-18T01:28:46.434209Z", - "iopub.status.idle": "2023-05-18T01:28:46.562315Z", - "shell.execute_reply": "2023-05-18T01:28:46.561538Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import radical.pilot as rp\n", @@ -187,28 +167,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:46.565283Z", - "iopub.status.busy": "2023-05-18T01:28:46.565052Z", - "iopub.status.idle": "2023-05-18T01:28:58.956760Z", - "shell.execute_reply": "2023-05-18T01:28:58.955740Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[94mnew session: \u001b[39m\u001b[0m[rp.session.three.mturilli.019495.0004]\u001b[39m\u001b[0m\u001b[94m \\\n", - "database : \u001b[39m\u001b[0m[mongodb://rct-tutorial:****@95.217.193.116:27017/rct-tutorial]\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mcreate pilot manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mcreate task manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "session = rp.Session()\n", "pmgr = rp.PilotManager(session=session)\n", @@ -216,7 +177,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -225,45 +185,15 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:58.960227Z", - "iopub.status.busy": "2023-05-18T01:28:58.959826Z", - "iopub.status.idle": "2023-05-18T01:28:59.079197Z", - "shell.execute_reply": "2023-05-18T01:28:59.077877Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], - "source": [ - "!mkdir -p ./input_dir" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:59.083906Z", - "iopub.status.busy": "2023-05-18T01:28:59.083229Z", - "iopub.status.idle": "2023-05-18T01:28:59.456334Z", - "shell.execute_reply": "2023-05-18T01:28:59.455078Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[94msubmit 1 pilot(s)\u001b[39m\u001b[0m\n", - " pilot.0000 local.localhost 2 cores 0 gpus\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - } - ], "source": [ "# Staging directives for the pilot.\n", "\n", + "import os\n", + "os.makedirs('./input_dir', exist_ok=True)\n", + "\n", "with open('./input_dir/input.txt', 'w') as f:\n", " f.write('Staged data (task_id=$RP_TASK_ID | pilot_id=$RP_PILOT_ID | session_id=$RP_SESSION_ID)')\n", "\n", @@ -288,7 +218,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -303,38 +232,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:59.460398Z", - "iopub.status.busy": "2023-05-18T01:28:59.460055Z", - "iopub.status.idle": "2023-05-18T01:29:21.091083Z", - "shell.execute_reply": "2023-05-18T01:29:21.090256Z" - } + "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 2\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "data": { - "text/plain": [ - "['DONE', 'DONE']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Staging directives for tasks.\n", "\n", @@ -346,8 +248,8 @@ " output = 'output.%d.txt' % idx\n", "\n", " td = rp.TaskDescription({\n", - " 'executable' : 'eval',\n", - " 'arguments' : ['echo \"$(cat input.txt)\"'],\n", + " 'executable' : '/bin/echo',\n", + " 'arguments' : ['$(cat input.txt)'],\n", " 'stdout' : output,\n", " # link file from the pilot sandbox to the task sandbox\n", " 'input_staging' : [{'source': 'pilot:///input_dir/input.txt',\n", @@ -367,7 +269,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -376,28 +277,9 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:29:21.094369Z", - "iopub.status.busy": "2023-05-18T01:29:21.094145Z", - "iopub.status.idle": "2023-05-18T01:29:22.102509Z", - "shell.execute_reply": "2023-05-18T01:29:22.101554Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['/home/mturilli/github/radical.pilot/docs/source/tutorials/output_dir/output.0.txt',\n", - " '/home/mturilli/github/radical.pilot/docs/source/tutorials/output_dir/output.1.txt']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Staging data from the pilot sandbox to the client working directory\n", "\n", @@ -408,56 +290,18 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:29:22.106080Z", - "iopub.status.busy": "2023-05-18T01:29:22.105703Z", - "iopub.status.idle": "2023-05-18T01:29:22.225907Z", - "shell.execute_reply": "2023-05-18T01:29:22.224513Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Staged data (task_id=task.000000 | pilot_id=pilot.0000 | session_id=rp.session.three.mturilli.019495.0004)\r\n", - "Staged data (task_id=task.000001 | pilot_id=pilot.0000 | session_id=rp.session.three.mturilli.019495.0004)\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!cat output_dir/*" ] }, { "cell_type": "code", - "execution_count": 9, - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:29:22.230180Z", - "iopub.status.busy": "2023-05-18T01:29:22.229776Z", - "iopub.status.idle": "2023-05-18T01:29:42.278669Z", - "shell.execute_reply": "2023-05-18T01:29:42.277689Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[94mclosing session rp.session.three.mturilli.019495.0004\u001b[39m\u001b[0m\u001b[94m \\\n", - "close task manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mclose pilot manager\u001b[39m\u001b[0m\u001b[94m \\\n", - "wait for 1 pilot(s)\n", - " \u001b[39m\u001b[0m\u001b[93m timeout\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94msession lifetime: 55.6s\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "session.close(cleanup=True)" ] @@ -479,7 +323,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.9.13" } }, "nbformat": 4, From 02decef861a9181349536714f1f3fc969a87040c Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 30 Aug 2023 19:13:46 -0400 Subject: [PATCH 125/171] fixed `task_pre_exec` configurable parameter for Popen --- src/radical/pilot/agent/executing/popen.py | 2 +- src/radical/pilot/configs/resource_ornl.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index 0786025c06..43c67f5c7d 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -708,7 +708,7 @@ def _extend_pre_exec(self, td, ranks=None): td['pre_exec'].append(rank_env) # pre-defined `pre_exec` per platform configuration - td['pre_exec'].extend(ru.as_list(self._cfg.get('task_pre_exec'))) + td['pre_exec'].extend(ru.as_list(self.session.cfg.get('task_pre_exec'))) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/configs/resource_ornl.json b/src/radical/pilot/configs/resource_ornl.json index 71926109fd..af2afe82d7 100644 --- a/src/radical/pilot/configs/resource_ornl.json +++ b/src/radical/pilot/configs/resource_ornl.json @@ -208,7 +208,7 @@ "blocked_cores" : [], "blocked_gpus" : [] }, - "task_pre_exec" : ["export LD_LIBRARY_PATH=/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/container/../lib/pami_port:${LD_LIBRARY_PATH}"] + "task_pre_exec" : ["export LD_LIBRARY_PATH=/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/container/../lib/pami_port:$LD_LIBRARY_PATH"] }, "summit_jsrun": { @@ -257,7 +257,7 @@ "blocked_cores" : [], "blocked_gpus" : [] }, - "task_pre_exec" : ["export LD_LIBRARY_PATH=/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/container/../lib/pami_port:${LD_LIBRARY_PATH}"] + "task_pre_exec" : ["export LD_LIBRARY_PATH=/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/container/../lib/pami_port:$LD_LIBRARY_PATH"] }, "summit_interactive": { From 0adcc3558642532e75686381ddc5e71edb86e537 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 31 Aug 2023 13:44:49 +0200 Subject: [PATCH 126/171] fix msg forwarding, clean logs --- src/radical/pilot/messages.py | 33 +++++++++++++++---- src/radical/pilot/pilot_manager.py | 2 +- src/radical/pilot/pmgr/launching/base.py | 2 +- src/radical/pilot/session.py | 42 +++++++++++++----------- src/radical/pilot/utils/component.py | 2 +- 5 files changed, 53 insertions(+), 28 deletions(-) diff --git a/src/radical/pilot/messages.py b/src/radical/pilot/messages.py index d6395d9578..4bf8cbab02 100644 --- a/src/radical/pilot/messages.py +++ b/src/radical/pilot/messages.py @@ -4,14 +4,32 @@ import radical.utils as ru +# ------------------------------------------------------------------------------ +# +class RPBaseMessage(ru.Message): + + # rpc distinguishes messages which are forwarded to the proxy bridge and + # those which are not and thus remain local to the module they originate in. + + _schema = {'fwd' : bool} + _defaults = {'_msg_type': 'rp_msg', + 'fwd' : False} + + + # we do not register this message type - it is not supposed to be used + # directly. + # ------------------------------------------------------------------------------ # -class HeartbeatMessage(ru.Message): +class HeartbeatMessage(RPBaseMessage): + + # heartbeat messages are never forwarded _schema = {'uid' : str} _defaults = {'_msg_type': 'heartbeat', - 'uid' : None} + 'uid' : None, + 'fwd' : False} ru.Message.register_msg_type('heartbeat', HeartbeatMessage) @@ -19,7 +37,7 @@ class HeartbeatMessage(ru.Message): # ------------------------------------------------------------------------------ # -class RPCRequestMessage(ru.Message): +class RPCRequestMessage(RPBaseMessage): _schema = {'uid' : str, # uid of message 'addr' : str, # who is expected to act on the request @@ -32,7 +50,9 @@ class RPCRequestMessage(ru.Message): 'addr' : None, 'cmd' : None, 'args' : [], - 'kwargs' : {}} + 'kwargs' : {}, + 'fwd' : True} + ru.Message.register_msg_type('rpc_req', RPCRequestMessage) @@ -40,7 +60,7 @@ class RPCRequestMessage(ru.Message): # ------------------------------------------------------------------------------ # -class RPCResultMessage(ru.Message): +class RPCResultMessage(RPBaseMessage): _schema = {'uid' : str, # uid of rpc call 'val' : Any, # return value (`None` by default) @@ -52,7 +72,8 @@ class RPCResultMessage(ru.Message): 'val' : None, 'out' : None, 'err' : None, - 'exc' : None} + 'exc' : None, + 'fwd' : True} # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 69a33592b9..6cd68b7146 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -330,7 +330,7 @@ def _control_sub_cb(self, topic, msg): cmd = msg['cmd'] arg = msg['arg'] - self._log.debug('got control cmd %s: %s', cmd, arg) + self._log.debug_9('got control cmd %s: %s', cmd, arg) if cmd == 'pilot_activate': pilot = arg['pilot'] diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index 2fb918abba..93822d1f70 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -225,7 +225,7 @@ def _pmgr_control_cb(self, topic, msg): cmd = msg['cmd'] arg = msg['arg'] - self._log.debug('launcher got %s', msg) + self._log.debug_9('launcher got %s', msg) if cmd == 'kill_pilots': diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 9e65fce787..a841312701 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -576,7 +576,7 @@ def _hb_term_cb(hb_uid): # -------------------------------------- # create heartbeat manager which monitors all components in this session - self._log.debug('=== hb %s from session', self._uid) + # self._log.debug('=== hb %s from session', self._uid) self._hb = ru.Heartbeat(uid=self._uid, timeout=self._cfg.heartbeat.timeout, interval=self._cfg.heartbeat.interval, @@ -592,8 +592,6 @@ def _hb_msg_cb(topic, msg): hb_msg = HeartbeatMessage(from_dict=msg) - self._log.debug('msg: %s', msg) - if hb_msg.uid != self._uid: self._hb.beat(uid=hb_msg.uid) # -------------------------------------- @@ -710,7 +708,7 @@ def crosswire_pubsub(self, src, tgt, from_proxy): # we only forward messages which have either no origin set (in this case # this method sets the origin), or whose origin is the same as # configured when crosswiring the channels (either 'client' or the pilot - # ID). + # ID). Also, the messages need to have the `forward` flag set. path = self._cfg.path reg = self._reg @@ -729,32 +727,38 @@ def pubsub_fwd(topic, msg): if 'origin' not in msg: msg['origin'] = self._module - # self._log.debug('XXX =?= fwd %s to %s: %s [%s - %s]', src, tgt, msg, - # msg['origin'], self._module) - if from_proxy: # all messages *from* the proxy are forwarded - but not the ones - # which originated in *this* module in the first place + # which originated in *this* module in the first place. if msg['origin'] == self._module: - self._log.debug('XXX =>! fwd %s to topic:%s: %s', src, tgt, msg) + # self._log.debug('XXX >=! fwd %s to topic:%s: %s', src, tgt, msg) + return - else: - self._log.debug('XXX =>> fwd %s to topic:%s: %s', src, tgt, msg) - publisher.put(tgt, msg) + # self._log.debug('XXX >=> fwd %s to topic:%s: %s', src, tgt, msg) + publisher.put(tgt, msg) else: - # *to* proxy: forward all messages which originated in *this* - # module + # only forward messages which have the respective flag set + if not msg.get('fwd'): + # self._log.debug('XXX =>! fwd %s to %s: %s [%s - %s]', src, + # tgt, msg, msg['origin'], self._module) + return - if msg['origin'] == self._module: - self._log.debug('XXX ==> fwd %s to topic:%s: %s', src, tgt, msg) - publisher.put(tgt, msg) + # avoid message loops (forward only once) + msg['fwd'] = False + + # only forward all messages which originated in *this* module. + + if not msg['origin'] == self._module: + # self._log.debug('XXX =>| fwd %s to topic:%s: %s', src, tgt, msg) + return + + # self._log.debug('XXX =>> fwd %s to topic:%s: %s', src, tgt, msg) + publisher.put(tgt, msg) - else: - self._log.debug('XXX =!> fwd %s to topic:%s: %s', src, tgt, msg) ru.zmq.Subscriber(channel=src, topic=src, path=path, cb=pubsub_fwd, url=url_sub, log=self._log, prof=self._prof) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 7aa35f34a7..21cd0d535d 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -324,7 +324,7 @@ def _cancel_monitor_cb(self, topic, msg): # currently have no abstract 'cancel' command, but instead use # 'cancel_tasks'. - self._log.debug('command incoming: %s', msg) + self._log.debug_9('command incoming: %s', msg) cmd = msg['cmd'] arg = msg['arg'] From bce3d4d1b0e74552207eada5eb25967e8218b3e2 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 31 Aug 2023 17:49:06 -0400 Subject: [PATCH 127/171] adapted resource config for `ccs.mahti` to the new structure --- src/radical/pilot/configs/resource_csc.json | 30 ++++++++++----------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/radical/pilot/configs/resource_csc.json b/src/radical/pilot/configs/resource_csc.json index 3491b8dc9b..3132e7991a 100644 --- a/src/radical/pilot/configs/resource_csc.json +++ b/src/radical/pilot/configs/resource_csc.json @@ -3,18 +3,18 @@ { "description" : "1404 CPU nodes", - "schemas" : ["local", "batch", "interactive"], - "local" : - { - "job_manager_endpoint" : "slurm://mahti.csc.fi/", - "filesystem_endpoint" : "file://mahti.csc.fi/" - }, - "batch" : "interactive", - "interactive" : - { - "job_manager_endpoint" : "fork://localhost/", - "filesystem_endpoint" : "file://localhost/" - }, + "default_schema" : "local", + "schemas" : { + "local" : { + "job_manager_endpoint": "slurm://mahti.csc.fi/", + "filesystem_endpoint" : "file://mahti.csc.fi/" + }, + "batch" : "interactive", + "interactive" : { + "job_manager_endpoint": "fork://localhost/", + "filesystem_endpoint" : "file://localhost/" + } + } "default_queue" : "test", "resource_manager" : "SLURM", @@ -31,12 +31,10 @@ "module load tykky" ], "launch_methods" : { - "order" : ["SRUN"], - "SRUN" : { - } + "order" : ["SRUN"], + "SRUN" : {} }, - "python_dist" : "default", "virtenv_mode" : "local" } } From f26cb1244e231f9197fd950004b4346552a1781f Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 31 Aug 2023 17:55:00 -0400 Subject: [PATCH 128/171] fixed `csc.mahti` config --- src/radical/pilot/configs/resource_csc.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/radical/pilot/configs/resource_csc.json b/src/radical/pilot/configs/resource_csc.json index 3132e7991a..c2c93ddbaa 100644 --- a/src/radical/pilot/configs/resource_csc.json +++ b/src/radical/pilot/configs/resource_csc.json @@ -1,6 +1,5 @@ { - "mahti": - { + "mahti": { "description" : "1404 CPU nodes", "default_schema" : "local", @@ -14,15 +13,15 @@ "job_manager_endpoint": "fork://localhost/", "filesystem_endpoint" : "file://localhost/" } - } + }, "default_queue" : "test", "resource_manager" : "SLURM", "cores_per_node" : 64, "gpus_per_node" : 0, - - "agent_config" : "default", + + "agent_config" : "default", "agent_scheduler" : "CONTINUOUS", "agent_spawner" : "POPEN", "default_remote_workdir" : "/scratch/project_%(pd.project)s", From 9812824f6b1d5c8e3fc09b6b09919bdaa87a0871 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 31 Aug 2023 18:04:15 -0400 Subject: [PATCH 129/171] fixed test for `Popen._extend_pre_exec` --- tests/unit_tests/test_executing/test_popen.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/unit_tests/test_executing/test_popen.py b/tests/unit_tests/test_executing/test_popen.py index 39c7718ce0..76d84ee7af 100755 --- a/tests/unit_tests/test_executing/test_popen.py +++ b/tests/unit_tests/test_executing/test_popen.py @@ -135,7 +135,8 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, def test_extend_pre_exec(self, mocked_init): pex = Popen(cfg=None, session=None) - pex._cfg = {} + pex._session = mock.Mock() + pex._session.cfg.get.return_value = None td = {'cores_per_rank': 2, 'threading_type': '', @@ -151,7 +152,9 @@ def test_extend_pre_exec(self, mocked_init): td.update({'threading_type': rpc.OpenMP, 'gpu_type' : rpc.CUDA}) - pex._cfg['task_pre_exec'] = ['export TEST_ENV=test'] + + # we target attribute "task_pre_exec" + pex._session.cfg.get.return_value = ['export TEST_ENV=test'] pex._extend_pre_exec(td, ranks) self.assertIn('export OMP_NUM_THREADS=2', td['pre_exec']) From 7cc44347714f9760fa62b5eddebb984cd63b0ed4 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Thu, 31 Aug 2023 18:59:58 -0400 Subject: [PATCH 130/171] closing session in the "describing tasks" tutorial --- docs/source/tutorials/describing_tasks.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/tutorials/describing_tasks.ipynb b/docs/source/tutorials/describing_tasks.ipynb index 7532303788..50e4eaf7f8 100644 --- a/docs/source/tutorials/describing_tasks.ipynb +++ b/docs/source/tutorials/describing_tasks.ipynb @@ -1079,7 +1079,7 @@ ], "source": [ "report.header('finalize')\n", - "# session.close()" + "session.close()" ] } ], From 2c71d52b5a57e25540890648f2e097ca6d26f62b Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 6 Sep 2023 13:44:38 +0200 Subject: [PATCH 131/171] snapshot --- src/radical/pilot/agent/agent_0.py | 3 +- src/radical/pilot/pilot.py | 30 +++++++----- src/radical/pilot/pilot_manager.py | 12 ++--- src/radical/pilot/task_manager.py | 17 +++---- src/radical/pilot/utils/component.py | 18 +++---- src/radical/pilot/utils/prof_utils.py | 68 ++++++++++++++++----------- 6 files changed, 78 insertions(+), 70 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 8862f620ba..03b9733e3b 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -228,7 +228,8 @@ def initialize(self): rm_info = self._rm.info n_nodes = len(rm_info['node_list']) - pilot = {'type' : 'pilot', + pilot = {'$all' : True, # pass full info to client side + 'type' : 'pilot', 'uid' : self._pid, 'state' : rps.PMGR_ACTIVE, 'resources': {'rm_info': rm_info, diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 37d7f93776..6f854128a9 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -194,17 +194,6 @@ def _default_state_cb(self, pilot, state=None): self._log.info("[Callback]: pilot %s state: %s.", uid, state) - if state in rps.FINAL: - # dump json - json = self.as_dict() - # json['_id'] = self.uid - json['type'] = 'pilot' - json['uid'] = self.uid - - tgt = '%s/%s.json' % (self._session.path, self.uid) - ru.write_json(json, tgt) - - if state == rps.FAILED and self._exit_on_error: self._log.error("[Callback]: pilot '%s' failed (exit)", uid) @@ -252,7 +241,14 @@ def _update(self, pilot_dict): self._state = target # keep all information around - self._pilot_dict = copy.deepcopy(pilot_dict) + ru.dict_merge(self._pilot_dict, pilot_dict, ru.OVERWRITE) + + # FIXME MONGODB + resources = self._pilot_dict.get('resources') or {} + rm_info = resources.get('rm_info') + if rm_info: + del self._pilot_dict['resources']['rm_info'] + self._pilot_dict['resource_details'] = rm_info # invoke pilot specific callbacks # FIXME: this iteration needs to be thread-locked! @@ -291,6 +287,7 @@ def as_dict(self): 'stdout' : self.stdout, 'stderr' : self.stderr, 'resource' : self.resource, + 'resources' : self.resources, 'endpoint_fs' : str(self._endpoint_fs), 'resource_sandbox' : str(self._resource_sandbox), 'session_sandbox' : str(self._session_sandbox), @@ -405,6 +402,15 @@ def resource(self): return self._descr.get('resource') + # -------------------------------------------------------------------------- + # + @property + def resources(self): + """str: The amount of resources used by this pilot.""" + + return self._pilot_dict.get('resources') + + # -------------------------------------------------------------------------- # @property diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 6cd68b7146..e05de70de4 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -244,15 +244,15 @@ def close(self, terminate=True): # dump json json = self.as_dict() - # json['_id'] = self.uid - json['type'] = 'pmgr' - json['uid'] = self.uid + # json['_id'] = self.uid + json['type'] = 'pmgr' + json['uid'] = self.uid + json['pilots'] = [pilot.as_dict() for pilot in self._pilots.values()] tgt = '%s/%s.json' % (self._session.path, self.uid) ru.write_json(json, tgt) - # -------------------------------------------------------------------------- # def as_dict(self): @@ -310,8 +310,8 @@ def _state_sub_cb(self, topic, msg): if 'type' in thing and thing['type'] == 'pilot': - self._log.debug('state push: %s: %s', thing['uid'], - thing['state']) + self._log.debug('state push: %s: %s %s', thing['uid'], + thing['state'], thing.get('resources')) # we got the state update from the state callback - don't # publish it again diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index 96b80a49f9..7ad8bbf193 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -119,7 +119,7 @@ def __init__(self, session, cfg='default', scheduler=None): self._tcb_lock = mt.RLock() self._terminate = mt.Event() self._closed = False - self._task_info = list() + self._task_info = dict() for m in rpc.TMGR_METRICS: self._callbacks[m] = dict() @@ -252,18 +252,14 @@ def close(self): # dump json json = self.as_dict() - # json['_id'] = self.uid - json['type'] = 'tmgr' - json['uid'] = self.uid + # json['_id'] = self.uid + json['type'] = 'tmgr' + json['uid'] = self.uid + json['tasks'] = self._task_info tgt = '%s/%s.json' % (self._session.path, self.uid) ru.write_json(json, tgt) - # dump task json - json = self._task_info - tgt = '%s/tasks.%s.json' % (self._session.path, self.uid) - ru.write_json(json, tgt) - # -------------------------------------------------------------------------- # @@ -411,8 +407,7 @@ def _update_tasks(self, task_dicts): self._tasks[uid]._update(task_dict) to_notify.append([task, s]) - if task_dict['state'] in rps.FINAL: - self._task_info.append(task_dict) + self._task_info[uid] = task_dict if to_notify: if _USE_BULK_CB: diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 21cd0d535d..f6be02fbfe 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -299,9 +299,6 @@ def create(cfg, session): assert cfg.kind in comp, '%s not in %s (%s)' % (cfg.kind, list(comp.keys()), cfg.uid) - import pprint - session._log.debug('create 1 %s: %s', cfg.kind, - pprint.pformat(cfg.as_dict())) return comp[cfg.kind].create(cfg, session) @@ -815,9 +812,6 @@ def work_cb(self): # TODO: should a poller over all inputs, or better yet register # a callback - # import pprint - # pprint.pprint(self._inputs) - for name in self._inputs: qname = self._inputs[name]['qname'] @@ -972,23 +966,23 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, # If '$set' is set, we also publish all keys listed in there. # In all other cases, we only send 'uid', 'type' and 'state'. for thing in things: + + self._log.debug('=== 1 %s %s: %s', thing['uid'], thing['state'], thing.get('resources')) + if '$all' in thing: del thing['$all'] - if '$set' in thing: - del thing['$set'] to_publish.append(thing) + self._log.debug('=== 1 %s %s all', thing['uid'], thing['state']) elif thing['state'] in rps.FINAL: + self._log.debug('=== 1 %s %s final', thing['uid'], thing['state']) to_publish.append(thing) else: tmp = {'uid' : thing['uid'], 'type' : thing['type'], 'state' : thing['state']} - if '$set' in thing: - for key in thing['$set']: - tmp[key] = thing[key] - del thing['$set'] + self._log.debug('=== 1 %s %s tmp', thing['uid'], thing['state']) to_publish.append(tmp) self.publish(rpc.STATE_PUBSUB, {'cmd': 'update', diff --git a/src/radical/pilot/utils/prof_utils.py b/src/radical/pilot/utils/prof_utils.py index 8a47a6dad6..aca87d4621 100644 --- a/src/radical/pilot/utils/prof_utils.py +++ b/src/radical/pilot/utils/prof_utils.py @@ -567,30 +567,35 @@ def get_session_description(sid, src=None): if not src: src = '%s/%s' % (os.getcwd(), sid) - json = ru.read_json('%s/%s.json' % (src, sid)) - - # make sure we have uids - # FIXME v0.47: deprecate - def fix_json(json): - def fix_uids(json): - if isinstance(json, list): - for elem in json: - fix_uids(elem) - elif isinstance(json, dict): - if 'taskmanager' in json and 'tmgr' not in json: - json['tmgr'] = json['taskmanager'] - if 'pilotmanager' in json and 'pmgr' not in json: - json['pmgr'] = json['pilotmanager'] - if '_id' in json and 'uid' not in json: - json['uid'] = json['_id'] - if 'cfg' not in json: - json['cfg'] = dict() - for v in json.values(): - fix_uids(v) - fix_uids(json) - fix_json(json) - - assert sid == json['session'][0]['uid'], 'sid inconsistent' + # construct session json from registry dump, tmgr and pmgr json files, and + # pilot and task json files + + json = dict() + + reg = ru.read_json('%s/%s.reg.json' % (src, sid)) + del reg['rcfgs'] + + json['session'] = [ reg ] + json['tmgr'] = list() + json['pmgr'] = list() + json['pilot'] = list() + json['task'] = list() + + for fname in glob.glob(str('%s/tmgr.*.json' % src)): + json['tmgr'].append(ru.read_json(fname)) + + for fname in glob.glob(str('%s/pmgr.*.json' % src)): + json['pmgr'].append(ru.read_json(fname)) + + for tmgr in json['tmgr']: + json['task'].extend(tmgr['tasks'].values()) + del tmgr['tasks'] + + for pmgr in json['pmgr']: + json['pilot'].extend(pmgr['pilots']) + del pmgr['pilots'] + + json['session'][0]['uid'] = sid ret = dict() ret['entities'] = dict() @@ -608,7 +613,7 @@ def fix_uids(json): tree[sid]['children'].append(uid) tree[uid] = {'uid' : uid, 'etype' : 'pmgr', - 'cfg' : pmgr['cfg'], + 'cfg' : pmgr.get('cfg', {}), 'has' : ['pilot'], 'children' : list() } @@ -618,7 +623,7 @@ def fix_uids(json): tree[sid]['children'].append(uid) tree[uid] = {'uid' : uid, 'etype' : 'tmgr', - 'cfg' : tmgr['cfg'], + 'cfg' : tmgr.get('cfg', {}), 'has' : ['task'], 'children' : list() } @@ -626,10 +631,17 @@ def fix_uids(json): tree[uid]['description'] = dict() for pilot in sorted(json['pilot'], key=lambda k: k['uid']): + pid = pilot['uid'] pmgr = pilot['pmgr'] - details = ru.read_json('%s/%s.resources.json' % (src, pid)) - pilot['cfg']['resource_details'] = details + + details = pilot['descriptioa'] + details = ru.dict_merge(details, pilot['resource_details']) + + pilot['cfg'] = details + pilot['cfg']['resource_details'] = details + pilot['cfg']['resource_details']['rm_info'] = details + tree[pmgr]['children'].append(pid) tree[pid] = {'uid' : pid, 'etype' : 'pilot', From 72a715f7c41a0958d80c02ce1e4cd29622c34020 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 6 Sep 2023 14:19:01 +0200 Subject: [PATCH 132/171] typo --- examples/00_getting_started.py | 3 ++- src/radical/pilot/utils/prof_utils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index 5ccb15cff6..d4b8224d67 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -81,7 +81,8 @@ # create a new task description, and fill it. td = rp.TaskDescription() - td.executable = '/bin/date' + td.executable = '/bin/sleep' + td.arguments = ['1'] td.ranks = 1 td.cores_per_rank = 1 diff --git a/src/radical/pilot/utils/prof_utils.py b/src/radical/pilot/utils/prof_utils.py index aca87d4621..2b8a56edf5 100644 --- a/src/radical/pilot/utils/prof_utils.py +++ b/src/radical/pilot/utils/prof_utils.py @@ -635,7 +635,7 @@ def get_session_description(sid, src=None): pid = pilot['uid'] pmgr = pilot['pmgr'] - details = pilot['descriptioa'] + details = pilot['description'] details = ru.dict_merge(details, pilot['resource_details']) pilot['cfg'] = details From 4c0a274de6eba86c4635463942fa837db68a1fbd Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Wed, 6 Sep 2023 14:35:46 +0200 Subject: [PATCH 133/171] fix staging back --- src/radical/pilot/session.py | 36 +++++++++--------- src/radical/pilot/utils/prof_utils.py | 1 - src/radical/pilot/utils/session.py | 54 +++------------------------ 3 files changed, 23 insertions(+), 68 deletions(-) diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index a841312701..046dd270aa 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -917,8 +917,8 @@ def close(self, **kwargs): tgt = self._cfg.base # # FIXME: MongoDB # self.fetch_json (tgt='%s/%s' % (tgt, self.uid)) - # self.fetch_profiles(tgt=tgt) - # self.fetch_logfiles(tgt=tgt) + self.fetch_profiles(tgt=tgt) + self.fetch_logfiles(tgt=tgt) self._prof.prof("session_fetch_stop", uid=self._uid) @@ -1252,22 +1252,22 @@ def get_resource_config(self, resource, schema=None): # skip_existing=True) # # - # # -------------------------------------------------------------------------- - # # - # def fetch_profiles(self, tgt=None): - # - # return rpu.fetch_profiles(self._uid, tgt=tgt, session=self, - # skip_existing=True) - # - # - # # -------------------------------------------------------------------------- - # # - # def fetch_logfiles(self, tgt=None): - # - # return rpu.fetch_logfiles(self._uid, tgt=tgt, session=self, - # skip_existing=True) - # - # + # -------------------------------------------------------------------------- + # + def fetch_profiles(self, tgt=None): + + return rpu.fetch_profiles(self._uid, tgt=tgt, session=self, + skip_existing=True) + + + # -------------------------------------------------------------------------- + # + def fetch_logfiles(self, tgt=None): + + return rpu.fetch_logfiles(self._uid, tgt=tgt, session=self, + skip_existing=True) + + # -------------------------------------------------------------------------- # def _get_client_sandbox(self): diff --git a/src/radical/pilot/utils/prof_utils.py b/src/radical/pilot/utils/prof_utils.py index 2b8a56edf5..e9c4e41511 100644 --- a/src/radical/pilot/utils/prof_utils.py +++ b/src/radical/pilot/utils/prof_utils.py @@ -7,7 +7,6 @@ from .. import states as s from ..task_description import RAPTOR_MASTER, RAPTOR_WORKER, TASK_EXECUTABLE -from .session import fetch_json _debug = os.environ.get('RP_PROF_DEBUG') _node_index = dict() diff --git a/src/radical/pilot/utils/session.py b/src/radical/pilot/utils/session.py index 701dde2d6b..e9dbbe2017 100644 --- a/src/radical/pilot/utils/session.py +++ b/src/radical/pilot/utils/session.py @@ -11,52 +11,6 @@ rs_fs = rs.filesystem -# ------------------------------------------------------------------------------ -# -def fetch_json(sid, tgt=None, skip_existing=False, session=None, log=None): - ''' - Returns: - - file name. - - ''' - - if not log and session: - log = session._log - elif not log: - log = ru.Logger('radical.pilot.utils') - - if session: - rep = session._rep - else: - rep = ru.Reporter('radical.pilot.utils') - - if not tgt: - tgt = os.getcwd() - - if tgt.startswith('/'): - dst = '%s/%s/%s.json' % (tgt, sid, sid) - else: - dst = '%s/%s/%s/%s.json' % (os.getcwd(), tgt, sid, sid) - - ru.rec_makedir(os.path.dirname(dst)) - - if skip_existing and os.path.isfile(dst) and os.path.getsize(dst): - log.info("session already in %s", dst) - return dst - - # FIXME: MongoDB - raise NotImplementedError('MongoDB missing') - - json_docs = ... - ru.write_json(json_docs, dst) - - log.info("session written to %s", dst) - rep.ok("+ %s (json)\n" % sid) - - return dst - - # ------------------------------------------------------------------------------ # def fetch_filetype(ext, name, sid, src=None, tgt=None, access=None, @@ -135,9 +89,11 @@ def fetch_filetype(ext, name, sid, src=None, tgt=None, access=None, rs_file.close() # we need the session json for pilot details - json_name = fetch_json(sid, tgt, skip_existing, session, log) - json_docs = ru.read_json(json_name) - pilots = json_docs['pilot'] + pilots = list() + for fname in glob.glob('%s/pmgr.*.json' % sid): + json_doc = ru.read_json(fname) + pilots.extend(json_doc['pilots']) + num_pilots = len(pilots) log.debug("Session: %s", sid) From cf7c159246a45bbec81163facf6bec984e68bb96 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 7 Sep 2023 22:45:18 +0200 Subject: [PATCH 134/171] cfg cleanup --- src/radical/pilot/agent/executing/popen.py | 8 ++++---- src/radical/pilot/session.py | 14 ++++++++++++++ src/radical/pilot/utils/component.py | 5 ----- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/radical/pilot/agent/executing/popen.py b/src/radical/pilot/agent/executing/popen.py index 43c67f5c7d..ce0e0fe9c8 100644 --- a/src/radical/pilot/agent/executing/popen.py +++ b/src/radical/pilot/agent/executing/popen.py @@ -64,7 +64,7 @@ def initialize(self): self._watch_queue = queue.Queue() - self._pid = self._reg['cfg.pid'] + self._pid = self.session.cfg.pid # run watcher thread self._watcher = mt.Thread(target=self._watch) @@ -343,7 +343,7 @@ def _handle_task(self, task): # `start_new_session=True` is default, which enables decoupling # from the parent process group (part of the task cancellation) - _start_new_session = self._reg['rcfg.new_session_per_task'] or False + _start_new_session = self.session.rcfg.new_session_per_task or False self._prof.prof('task_run_start', uid=tid) task['proc'] = sp.Popen(args = cmdline, @@ -566,7 +566,7 @@ def _get_rp_env(self, task): ret += 'export RP_SESSION_SANDBOX="%s"\n' % self.ssbox ret += 'export RP_PILOT_SANDBOX="%s"\n' % self.psbox ret += 'export RP_TASK_SANDBOX="%s"\n' % sbox - ret += 'export RP_REGISTRY_ADDRESS="%s"\n' % self._session.reg_addr + ret += 'export RP_REGISTRY_ADDRESS="%s"\n' % self.session.reg_addr ret += 'export RP_CORES_PER_RANK=%d\n' % td['cores_per_rank'] ret += 'export RP_GPUS_PER_RANK=%s\n' % gpr @@ -708,7 +708,7 @@ def _extend_pre_exec(self, td, ranks=None): td['pre_exec'].append(rank_env) # pre-defined `pre_exec` per platform configuration - td['pre_exec'].extend(ru.as_list(self.session.cfg.get('task_pre_exec'))) + td['pre_exec'].extend(ru.as_list(self.session.rcfg.get('task_pre_exec'))) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 046dd270aa..19ee982565 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -1009,6 +1009,20 @@ def cfg(self): return self._cfg + # -------------------------------------------------------------------------- + # + @property + def rcfgs(self): + return self._rcfgs + + + # -------------------------------------------------------------------------- + # + @property + def rcfg(self): + return self._rcfg + + # -------------------------------------------------------------------------- # @property diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index f6be02fbfe..6f6a53c813 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -967,22 +967,17 @@ def advance(self, things, state=None, publish=True, push=False, qname=None, # In all other cases, we only send 'uid', 'type' and 'state'. for thing in things: - self._log.debug('=== 1 %s %s: %s', thing['uid'], thing['state'], thing.get('resources')) - if '$all' in thing: del thing['$all'] to_publish.append(thing) - self._log.debug('=== 1 %s %s all', thing['uid'], thing['state']) elif thing['state'] in rps.FINAL: - self._log.debug('=== 1 %s %s final', thing['uid'], thing['state']) to_publish.append(thing) else: tmp = {'uid' : thing['uid'], 'type' : thing['type'], 'state' : thing['state']} - self._log.debug('=== 1 %s %s tmp', thing['uid'], thing['state']) to_publish.append(tmp) self.publish(rpc.STATE_PUBSUB, {'cmd': 'update', From 924efce0bd33f36d47e02cc75613fa9a6ad67cda Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 8 Sep 2023 19:00:18 +0200 Subject: [PATCH 135/171] cfg cleanup --- src/radical/pilot/agent/agent_0.py | 50 +++++++++++-------- src/radical/pilot/agent/executing/base.py | 24 ++++----- src/radical/pilot/agent/executing/flux.py | 18 +++---- src/radical/pilot/agent/scheduler/base.py | 22 ++++---- .../pilot/agent/scheduler/continuous.py | 2 +- src/radical/pilot/agent/scheduler/flux.py | 9 ++-- src/radical/pilot/agent/scheduler/hombre.py | 2 +- 7 files changed, 67 insertions(+), 60 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 03b9733e3b..199f80f265 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -48,7 +48,6 @@ def __init__(self): self._pwd = cfg.pilot_sandbox self._session = Session(uid=cfg.sid, cfg=cfg, _role=Session._AGENT_0) - self._rcfg = self._session._rcfg # init the worker / component base classes, connects registry rpu.Worker.__init__(self, cfg, self._session) @@ -90,12 +89,12 @@ def _proxy_input_cb(self, msg): for task in msg: # make sure the tasks obtain env settings (if needed) - if 'task_environment' in self._cfg: + if 'task_environment' in self.session.rcfg: if not task['description'].get('environment'): task['description']['environment'] = dict() - for k,v in self._cfg['task_environment'].items(): + for k,v in self.session.rcfg.task_environment.items(): # FIXME: this might overwrite user specified env task['description']['environment'][k] = v @@ -139,9 +138,10 @@ def _configure_rm(self): # use for sub-agent startup. Add the remaining ResourceManager # information to the config, for the benefit of the scheduler). - self._cfg.reg_addr = self._session.reg_addr - self._rm = ResourceManager.create(name=self._rcfg.resource_manager, - cfg=self._cfg, rcfg=self._rcfg, + rname = self.session.rcfg.resource_manager + self._rm = ResourceManager.create(name=rname, + cfg=self.session.cfg, + rcfg=self.session.rcfg, log=self._log, prof=self._prof) self._log.debug(pprint.pformat(self._rm.info)) @@ -155,12 +155,12 @@ def _configure_app_comm(self): # channels, merge those into the agent config # # FIXME: this needs to start the app_comm bridges - app_comm = self._rcfg.get('app_comm') + app_comm = self.session.rcfg.get('app_comm') if app_comm: # bridge addresses also need to be exposed to the workload - if 'task_environment' not in self._rcfg: - self._rcfg['task_environment'] = dict() + if 'task_environment' not in self.session.rcfg: + self.session.rcfg['task_environment'] = dict() if isinstance(app_comm, list): app_comm = {ac: {'bulk_size': 0, @@ -175,8 +175,8 @@ def _configure_app_comm(self): AC = ac.upper() - self._rcfg['task_environment']['RP_%s_IN' % AC] = ac['addr_in'] - self._rcfg['task_environment']['RP_%s_OUT' % AC] = ac['addr_out'] + self.session.rcfg.task_environment['RP_%s_IN' % AC] = ac['addr_in'] + self.session.rcfg.task_environment['RP_%s_OUT' % AC] = ac['addr_out'] # -------------------------------------------------------------------------- @@ -317,19 +317,19 @@ def _write_sa_configs(self): # sub-agent config files. # write deep-copies of the config for each sub-agent (sans from agent_0) - for sa in self._rcfg.get('agents', {}): + for sa in self.session.cfg.get('agents', {}): assert (sa != 'agent_0'), 'expect subagent, not agent_0' # use our own config sans agents/components/bridges as a basis for # the sub-agent config. - tmp_cfg = copy.deepcopy(self._session._cfg) + tmp_cfg = copy.deepcopy(self.session.cfg) tmp_cfg['agents'] = dict() tmp_cfg['components'] = dict() tmp_cfg['bridges'] = dict() # merge sub_agent layout into the config - ru.dict_merge(tmp_cfg, self._cfg['agents'][sa], ru.OVERWRITE) + ru.dict_merge(tmp_cfg, self.session.cfg['agents'][sa], ru.OVERWRITE) tmp_cfg['uid'] = sa tmp_cfg['aid'] = sa @@ -399,10 +399,14 @@ def _start_sub_agents(self): # FIXME: reroute to agent daemonizer - if not self._cfg.get('agents'): + if not self.session.cfg.get('agents'): return - assert (len(self._rm.info.agent_node_list) >= len(self._cfg['agents'])) + n_agents = len(self.session.cfg['agents']) + n_agent_nodes = len(self._rm.info.agent_node_list) + + assert n_agent_nodes >= n_agents + self._log.debug('start_sub_agents') @@ -415,10 +419,10 @@ def _start_sub_agents(self): bs_name = '%s/bootstrap_2.sh' - for idx, sa in enumerate(self._cfg['agents']): + for idx, sa in enumerate(self.session.cfg['agents']): - target = self._cfg['agents'][sa]['target'] - bs_args = [self._sid, self._cfg.reg_addr, sa] + target = self.session.cfg['agents'][sa]['target'] + bs_args = [self._sid, self.session.cfg.reg_addr, sa] if target not in ['local', 'node']: @@ -515,11 +519,13 @@ def _start_sub_agents(self): def _check_lifetime(self): # Make sure that we haven't exceeded the runtime - otherwise terminate. - if self._cfg.runtime: + if self.session.cfg.runtime: - if time.time() >= self._starttime + (int(self._cfg.runtime) * 60): + if time.time() >= self._starttime + \ + (int(self.session.cfg.runtime) * 60): - self._log.info('runtime limit (%ss).', self._cfg.runtime * 60) + self._log.info('runtime limit (%ss).', + self.session.cfg.runtime * 60) self._final_cause = 'timeout' self.stop() return False # we are done diff --git a/src/radical/pilot/agent/executing/base.py b/src/radical/pilot/agent/executing/base.py index fa585b8450..9ae466a46f 100644 --- a/src/radical/pilot/agent/executing/base.py +++ b/src/radical/pilot/agent/executing/base.py @@ -50,7 +50,7 @@ def create(cls, cfg, session): if cls != AgentExecutingComponent: raise TypeError('Factory only available to base class!') - name = session._rcfg['agent_spawner'] + name = session.rcfg.agent_spawner from .popen import Popen from .flux import Flux @@ -72,25 +72,25 @@ def create(cls, cfg, session): # def initialize(self): - scfg = ru.Config(cfg=self._reg['cfg']) - rcfg = ru.Config(cfg=self._reg['rcfg']) - rm_name = rcfg['resource_manager'] - self._rm = rpa.ResourceManager.create(rm_name, scfg, rcfg, + rm_name = self.session.rcfg.resource_manager + self._rm = rpa.ResourceManager.create(rm_name, + self.session.cfg, + self.session.rcfg, self._log, self._prof) self._pwd = os.path.realpath(os.getcwd()) - self.sid = self._cfg['sid'] - self.resource = scfg['resource'] - self.rsbox = scfg['resource_sandbox'] - self.ssbox = scfg['session_sandbox'] - self.psbox = scfg['pilot_sandbox'] + self.sid = self.session.uid + self.resource = self.session.cfg.resource + self.rsbox = self.session.cfg.resource_sandbox + self.ssbox = self.session.cfg.session_sandbox + self.psbox = self.session.cfg.pilot_sandbox self.gtod = '$RP_PILOT_SANDBOX/gtod' self.prof = '$RP_PILOT_SANDBOX/prof' # if so configured, let the tasks know what to use as tmp dir - self._task_tmp = rcfg.get('task_tmp', - os.environ.get('TMP', '/tmp')) + self._task_tmp = self.session.rcfg.get('task_tmp', + os.environ.get('TMP', '/tmp')) if self.psbox.startswith(self.ssbox): self.psbox = '$RP_SESSION_SANDBOX%s' % self.psbox[len(self.ssbox):] diff --git a/src/radical/pilot/agent/executing/flux.py b/src/radical/pilot/agent/executing/flux.py index 527889526d..56144edbc3 100644 --- a/src/radical/pilot/agent/executing/flux.py +++ b/src/radical/pilot/agent/executing/flux.py @@ -56,11 +56,10 @@ def initialize(self): } # we get an instance of the resource manager (init from registry info) - scfg = ru.Config(cfg=self._reg['cfg']) - rcfg = ru.Config(cfg=self._reg['rcfg']) - - rm_name = rcfg['resource_manager'] - self._rm = ResourceManager.create(rm_name, scfg, rcfg, + rm_name = self.session.rcfg.resource_manager + self._rm = ResourceManager.create(rm_name, + self.session.cfg, + self.session.rcfg, self._log, self._prof) # thread termination signal @@ -120,10 +119,11 @@ def work(self, tasks): # def _listen(self): - lm_cfg = self._reg['rcfg.launch_methods'].get('FLUX') - lm_cfg['pid'] = self._cfg.pid - lm_cfg['reg_addr'] = self._cfg.reg_addr - lm = LaunchMethod.create('FLUX', lm_cfg, self._cfg, + lm_cfg = self.session.rcfg.launch_methods.get('FLUX') + lm_cfg['pid'] = self.session.cfg.pid + lm_cfg['reg_addr'] = self.session.cfg.reg_addr + lm = LaunchMethod.create('FLUX', lm_cfg, + self.session.cfg, self._log, self._prof) flux_handle = None try: diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index b06dcca390..51c45fbd1c 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -216,14 +216,10 @@ def initialize(self): # The scheduler needs the ResourceManager information which have been # collected during agent startup. - scfg = ru.Config(cfg=self._reg['cfg']) - rcfg = ru.Config(cfg=self._reg['rcfg']) - - # the resource manager needs to connect to the registry - rcfg.reg_addr = self._cfg.reg_addr - - rm_name = rcfg['resource_manager'] - self._rm = ResourceManager.create(rm_name, scfg, rcfg, + rm_name = self.session.rcfg.resource_manager + self._rm = ResourceManager.create(rm_name, + self.session.cfg, + self.session.rcfg, self._log, self._prof) self._partitions = self._rm.get_partitions() # {plabel : [node_ids]} @@ -293,7 +289,7 @@ def create(cls, cfg, session): if cls != AgentSchedulingComponent: raise TypeError("Scheduler Factory only available to base class!") - name = session._reg['rcfg.agent_scheduler'] + name = session.rcfg.agent_scheduler from .continuous_ordered import ContinuousOrdered from .continuous_colo import ContinuousColo @@ -602,8 +598,12 @@ def _schedule_tasks(self): # ZMQ endpoints will not have survived the fork. Specifically the # registry client of the component base class will have to reconnect. - # FIXME: should be moved into a post-fork hook of the base class - self._reg = ru.zmq.RegistryClient(url=self._cfg.reg_addr) + # Note that `self._reg` of the base class is a *pointer* to the sesison + # registry. + # + # FIXME: should be moved into a post-fork hook of the session + # + self._reg = ru.zmq.RegistryClient(url=self.session.cfg.reg_addr) # FIXME: the component does not clean out subscribers after fork :-/ self._subscribers = dict() diff --git a/src/radical/pilot/agent/scheduler/continuous.py b/src/radical/pilot/agent/scheduler/continuous.py index 346d0d9249..75d592d283 100644 --- a/src/radical/pilot/agent/scheduler/continuous.py +++ b/src/radical/pilot/agent/scheduler/continuous.py @@ -96,7 +96,7 @@ def _configure(self): # this option is set. This implementation is not optimized for the # scattered mode! The default is 'False'. # - self._scattered = self._cfg.get('scattered', False) + self._scattered = self.session.rcfg.get('scattered', False) # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/agent/scheduler/flux.py b/src/radical/pilot/agent/scheduler/flux.py index 59d6304fe7..fef146d8aa 100644 --- a/src/radical/pilot/agent/scheduler/flux.py +++ b/src/radical/pilot/agent/scheduler/flux.py @@ -72,10 +72,11 @@ def _configure(self): cfg = self._reg['bridges.%s' % qname] self._q = ru.zmq.Putter(qname, cfg['put']) - lm_cfg = self._reg['rcfg.launch_methods'].get('FLUX') - lm_cfg['pid'] = self._cfg.pid - lm_cfg['reg_addr'] = self._cfg.reg_addr - self._lm = LaunchMethod.create('FLUX', lm_cfg, self._cfg, + lm_cfg = self.session.rcfg.launch_methods.get('FLUX') + lm_cfg['pid'] = self.session.cfg.pid + lm_cfg['reg_addr'] = self.session.cfg.reg_addr + self._lm = LaunchMethod.create('FLUX', lm_cfg, + self.session.cfg, self._log, self._prof) diff --git a/src/radical/pilot/agent/scheduler/hombre.py b/src/radical/pilot/agent/scheduler/hombre.py index 37f4394c3e..98749ae225 100644 --- a/src/radical/pilot/agent/scheduler/hombre.py +++ b/src/radical/pilot/agent/scheduler/hombre.py @@ -49,7 +49,7 @@ def _configure(self): # `oversubscribe` is set to False (which is the default for now), # we'll prevent that behavior by allocating one additional CPU core # for each set of requested GPU processes. - self._oversubscribe = self._cfg.get('oversubscribe', True) + self._oversubscribe = self.session.rcfg.get('oversubscribe', True) if not self._oversubscribe: raise ValueError('HOMBRE needs oversubscription enabled') From a1893e43996118872b77098aee5a4ab1885e225e Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 10:12:02 -0400 Subject: [PATCH 136/171] Upload session when testing notebooks --- .github/workflows/run-rp-notebook.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 35567a4da9..220afed31d 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -53,4 +53,13 @@ jobs: . testenv/bin/activate jupyter nbconvert --clear-output --inplace $TARGET_PATH jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH - + SID=$(ls -rt | grep rp.session) + echo "$SID: $SID" + mkdir session + ls -la + cp -R $SID session + - name: upload session + uses: actions/upload-artifact@v3 + with: + name: session + path: session From 5528b19d9bffad1ddc641f1111c557b69cc3ac45 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 10:25:00 -0400 Subject: [PATCH 137/171] Call test matrix --- .github/workflows/docs.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 9f90d1b0b9..6b64a9ba7e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -8,9 +8,13 @@ name: 'Test Jupyter notebooks' on: push: - branches: [ devel ] + branches: + - devel + - devel_nodb_2 pull_request: - branches: [ devel ] + branches: + - devel + - devel_nodb_2 # This allows a subsequently queued workflow run to interrupt previous runs concurrency: From 03e18ed02131606fa2baa217637a44e401a1ceee Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 11:00:17 -0400 Subject: [PATCH 138/171] tar sessions --- .github/workflows/run-rp-notebook.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 220afed31d..0c5217c064 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -53,13 +53,15 @@ jobs: . testenv/bin/activate jupyter nbconvert --clear-output --inplace $TARGET_PATH jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH - SID=$(ls -rt | grep rp.session) - echo "$SID: $SID" + SIDCLIENT=$(ls -rt | grep rp.session) + SIDAGENT=~/radical.pilot.sandbox/$SIDCLIENT mkdir session - ls -la - cp -R $SID session + tar cvfj $SIDCLIENT.tar.bz2 $SIDCLENT + tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT + cp -R $SIDCLIENT.tar.bz2 $SIDAGENT.tar.bz2 session - name: upload session uses: actions/upload-artifact@v3 with: name: session path: session + retention-days: 5 From d657e952cdf02eaf42bacc255c5a46328fa6643e Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 11:05:43 -0400 Subject: [PATCH 139/171] debug --- .github/workflows/run-rp-notebook.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 0c5217c064..6ddf71be8c 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -54,7 +54,10 @@ jobs: jupyter nbconvert --clear-output --inplace $TARGET_PATH jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH SIDCLIENT=$(ls -rt | grep rp.session) - SIDAGENT=~/radical.pilot.sandbox/$SIDCLIENT + SIDAGENT="~/radical.pilot.sandbox/$SIDCLIENT" + ls $SIDCLIENT + ls ~/radical.pilot.sandbox/ + ls $SIDAGENT mkdir session tar cvfj $SIDCLIENT.tar.bz2 $SIDCLENT tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT From e4d9f4449be3cc3f4598ab07a7c56e5a1ab00176 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 11:13:22 -0400 Subject: [PATCH 140/171] debug --- .github/workflows/run-rp-notebook.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 6ddf71be8c..f1a014db81 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -54,14 +54,14 @@ jobs: jupyter nbconvert --clear-output --inplace $TARGET_PATH jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH SIDCLIENT=$(ls -rt | grep rp.session) - SIDAGENT="~/radical.pilot.sandbox/$SIDCLIENT" - ls $SIDCLIENT - ls ~/radical.pilot.sandbox/ - ls $SIDAGENT + # SIDAGENT="~/radical.pilot.sandbox/$SIDCLIENT" + # ls $SIDCLIENT + ls ~/ + # ls $SIDAGENT mkdir session tar cvfj $SIDCLIENT.tar.bz2 $SIDCLENT - tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT - cp -R $SIDCLIENT.tar.bz2 $SIDAGENT.tar.bz2 session + # tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT + cp -R $SIDCLIENT.tar.bz2 session - name: upload session uses: actions/upload-artifact@v3 with: From cbd17dfce9ccc0eb434ac38bbcfc847ae8da033e Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 11:17:27 -0400 Subject: [PATCH 141/171] debug --- .github/workflows/run-rp-notebook.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index f1a014db81..5010df3c02 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -55,8 +55,7 @@ jobs: jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH SIDCLIENT=$(ls -rt | grep rp.session) # SIDAGENT="~/radical.pilot.sandbox/$SIDCLIENT" - # ls $SIDCLIENT - ls ~/ + ls $SIDCLIENT # ls $SIDAGENT mkdir session tar cvfj $SIDCLIENT.tar.bz2 $SIDCLENT From b7e4e9a1ac2aeba035f06e87f82a25bca751f2c4 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 11:22:01 -0400 Subject: [PATCH 142/171] typo --- .github/workflows/run-rp-notebook.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 5010df3c02..f244b45708 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -55,10 +55,10 @@ jobs: jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH SIDCLIENT=$(ls -rt | grep rp.session) # SIDAGENT="~/radical.pilot.sandbox/$SIDCLIENT" - ls $SIDCLIENT + # ls $SIDCLIENT # ls $SIDAGENT mkdir session - tar cvfj $SIDCLIENT.tar.bz2 $SIDCLENT + tar cvfj $SIDCLIENT.tar.bz2 $SIDCLIENT # tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT cp -R $SIDCLIENT.tar.bz2 session - name: upload session From 861e775f652f05aeeeba348a78c51a9d3b9267e8 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 14:19:09 -0400 Subject: [PATCH 143/171] Timeout and separate data collection --- .github/workflows/run-rp-notebook.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index f244b45708..ea92d6c425 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -49,14 +49,17 @@ jobs: - name: Run Jupyter Notebook env: TARGET_PATH: ${{ format('{0}/{1}/{2}', inputs.documentation-path, inputs.notebook-path, inputs.notebook-name) }} + timeout-minutes: 5 run: | . testenv/bin/activate jupyter nbconvert --clear-output --inplace $TARGET_PATH jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH + - name: Collect session + run: | SIDCLIENT=$(ls -rt | grep rp.session) - # SIDAGENT="~/radical.pilot.sandbox/$SIDCLIENT" + SIDAGENT="$HOME/radical.pilot.sandbox/$SIDCLIENT" # ls $SIDCLIENT - # ls $SIDAGENT + ls $SIDAGENT mkdir session tar cvfj $SIDCLIENT.tar.bz2 $SIDCLIENT # tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT From dc00b0a7fc83bac244d192e4bbd78099e401b147 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 14:26:14 -0400 Subject: [PATCH 144/171] Check agent sandbox existence --- .github/workflows/run-rp-notebook.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index ea92d6c425..a77001595d 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -58,12 +58,14 @@ jobs: run: | SIDCLIENT=$(ls -rt | grep rp.session) SIDAGENT="$HOME/radical.pilot.sandbox/$SIDCLIENT" - # ls $SIDCLIENT - ls $SIDAGENT mkdir session tar cvfj $SIDCLIENT.tar.bz2 $SIDCLIENT - # tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT cp -R $SIDCLIENT.tar.bz2 session + if [ -d "$SIDAGENT" ]; then + ls $SIDAGENT + tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT + cp -R $SIDAGENT.tar.bz2 session + fi - name: upload session uses: actions/upload-artifact@v3 with: From 6c2ecbb00b0198cab9d14463f90e1cfffefe5a72 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 14:38:26 -0400 Subject: [PATCH 145/171] test timeout --- .github/workflows/run-rp-notebook.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index a77001595d..565111fa2e 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -49,7 +49,8 @@ jobs: - name: Run Jupyter Notebook env: TARGET_PATH: ${{ format('{0}/{1}/{2}', inputs.documentation-path, inputs.notebook-path, inputs.notebook-name) }} - timeout-minutes: 5 + timeout-minutes: 1 + continue-on-error: true run: | . testenv/bin/activate jupyter nbconvert --clear-output --inplace $TARGET_PATH From 78c6399570785f03a1c35cb2a2679f849ce05a91 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 14:44:50 -0400 Subject: [PATCH 146/171] test production --- .github/workflows/run-rp-notebook.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 565111fa2e..97a1268e6b 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -49,7 +49,7 @@ jobs: - name: Run Jupyter Notebook env: TARGET_PATH: ${{ format('{0}/{1}/{2}', inputs.documentation-path, inputs.notebook-path, inputs.notebook-name) }} - timeout-minutes: 1 + timeout-minutes: 10 continue-on-error: true run: | . testenv/bin/activate @@ -63,7 +63,6 @@ jobs: tar cvfj $SIDCLIENT.tar.bz2 $SIDCLIENT cp -R $SIDCLIENT.tar.bz2 session if [ -d "$SIDAGENT" ]; then - ls $SIDAGENT tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT cp -R $SIDAGENT.tar.bz2 session fi From e1a373562dc867cbb471aa9f8ce6ae78c216aed1 Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 15:08:49 -0400 Subject: [PATCH 147/171] Manage naming --- .github/workflows/run-rp-notebook.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 97a1268e6b..4c5dcc51bf 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -49,7 +49,7 @@ jobs: - name: Run Jupyter Notebook env: TARGET_PATH: ${{ format('{0}/{1}/{2}', inputs.documentation-path, inputs.notebook-path, inputs.notebook-name) }} - timeout-minutes: 10 + timeout-minutes: 5 continue-on-error: true run: | . testenv/bin/activate @@ -59,12 +59,14 @@ jobs: run: | SIDCLIENT=$(ls -rt | grep rp.session) SIDAGENT="$HOME/radical.pilot.sandbox/$SIDCLIENT" + CLIENTNAME="${{inputs.notebook-name}}_client_$SIDCLIENT" + AGENTNAME="${{inputs.notebook-name}}_agent_$SIDCLIENT" mkdir session - tar cvfj $SIDCLIENT.tar.bz2 $SIDCLIENT - cp -R $SIDCLIENT.tar.bz2 session + tar cvfj $CLIENTNAME.tar.bz2 $SIDCLIENT + cp -R $CLIENTNAME.tar.bz2 session if [ -d "$SIDAGENT" ]; then - tar cvfj $SIDAGENT.tar.bz2 $SIDAGENT - cp -R $SIDAGENT.tar.bz2 session + tar cvfj $AGENTNAME.tar.bz2 $SIDAGENT + cp -R $AGENTNAME.tar.bz2 session fi - name: upload session uses: actions/upload-artifact@v3 From ce9ecba588ce5bfea83915d3ec3ef22dce0532cb Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 15:20:08 -0400 Subject: [PATCH 148/171] Try keeping failed mark --- .github/workflows/run-rp-notebook.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index 4c5dcc51bf..b8323573e1 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -50,12 +50,13 @@ jobs: env: TARGET_PATH: ${{ format('{0}/{1}/{2}', inputs.documentation-path, inputs.notebook-path, inputs.notebook-name) }} timeout-minutes: 5 - continue-on-error: true + # continue-on-error: true run: | . testenv/bin/activate jupyter nbconvert --clear-output --inplace $TARGET_PATH jupyter nbconvert --to notebook --execute --inplace $TARGET_PATH - name: Collect session + if: always() run: | SIDCLIENT=$(ls -rt | grep rp.session) SIDAGENT="$HOME/radical.pilot.sandbox/$SIDCLIENT" @@ -69,6 +70,7 @@ jobs: cp -R $AGENTNAME.tar.bz2 session fi - name: upload session + if: always() uses: actions/upload-artifact@v3 with: name: session From edced6d83b9bd2009c59d02e36e9d0e07cf772bf Mon Sep 17 00:00:00 2001 From: Matteo Turilli Date: Mon, 11 Sep 2023 15:34:21 -0400 Subject: [PATCH 149/171] introducte naming convention Use `devel*` to trigger doc tests --- .github/workflows/docs.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6b64a9ba7e..54bfe44976 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,12 +9,10 @@ name: 'Test Jupyter notebooks' on: push: branches: - - devel - - devel_nodb_2 + - 'devel*' pull_request: branches: - - devel - - devel_nodb_2 + - 'devel*' # This allows a subsequently queued workflow run to interrupt previous runs concurrency: From 39e38a51548024ad48bcc9f0969c091f9adf064b Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Mon, 11 Sep 2023 15:48:54 -0400 Subject: [PATCH 150/171] fixed directory to be staged (staging tutorial) --- docs/source/tutorials/staging_data.ipynb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/tutorials/staging_data.ipynb b/docs/source/tutorials/staging_data.ipynb index 1153e09e10..acb945a00b 100644 --- a/docs/source/tutorials/staging_data.ipynb +++ b/docs/source/tutorials/staging_data.ipynb @@ -192,16 +192,18 @@ "# Staging directives for the pilot.\n", "\n", "import os\n", - "os.makedirs('./input_dir', exist_ok=True)\n", "\n", - "with open('./input_dir/input.txt', 'w') as f:\n", + "input_dir = os.path.join(os.getcwd(), 'input_dir')\n", + "os.makedirs(input_dir, exist_ok=True)\n", + "\n", + "with open(input_dir + '/input.txt', 'w') as f:\n", " f.write('Staged data (task_id=$RP_TASK_ID | pilot_id=$RP_PILOT_ID | session_id=$RP_SESSION_ID)')\n", "\n", "pd = rp.PilotDescription({\n", " 'resource' : 'local.localhost',\n", " 'cores' : 2,\n", " 'runtime' : 15,\n", - " 'input_staging': ['input_dir'],\n", + " 'input_staging': [input_dir],\n", " 'exit_on_error': False\n", "})\n", "\n", From 93255c8b51df66339e8893a768215f05bacbab5d Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 12 Sep 2023 00:47:39 -0400 Subject: [PATCH 151/171] added description about input staging data --- docs/source/tutorials/staging_data.ipynb | 36 ++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/docs/source/tutorials/staging_data.ipynb b/docs/source/tutorials/staging_data.ipynb index acb945a00b..6499bbd6f3 100644 --- a/docs/source/tutorials/staging_data.ipynb +++ b/docs/source/tutorials/staging_data.ipynb @@ -141,7 +141,7 @@ "\n", "

\n", "\n", - "__Note:__ In our examples, we will not show a progression bar while waiting for some operation to complete, e.g., while waiting for a pilot to stop. That is because the progression bar offered by RP's reporter does not work well within a notebook. You could use the reporter's progression bar when executing your RP application as a standalone Python script.\n", + "__Note:__ In these examples, we will not show a progression bar while waiting for some operation to complete, e.g., while waiting for a pilot to stop. That is because the progression bar offered by RP's reporter does not work well within a notebook. You could use the reporter's progression bar when executing your RP application as a standalone Python script.\n", "\n", "
" ] @@ -180,7 +180,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "For this example, we create a directory `input_dir` within the current working directory, and place a file into this directory. That file will be the input data for every task (this input file is referred in the [radical.pilot.TaskDescription.arguments](../apidoc.rst) attribute). The newly created directory `input_dir` is staged into the `pilot://` location with all its files." + "For this example, create a new directory `input_dir` within the current working directory, and place a file into this directory. That file will be the input data for every task (this input file is referred in the [radical.pilot.TaskDescription.arguments](../apidoc.rst) attribute).\n", + "\n", + "
\n", + "\n", + "__Warning:__ You need to ensure that the directory, where your script will create the data for staging, is writable. Also, you are responsible to cleanup that data after it is staged.\n", + "\n", + "
" ] }, { @@ -189,15 +195,35 @@ "metadata": {}, "outputs": [], "source": [ - "# Staging directives for the pilot.\n", - "\n", "import os\n", "\n", "input_dir = os.path.join(os.getcwd(), 'input_dir')\n", "os.makedirs(input_dir, exist_ok=True)\n", "\n", "with open(input_dir + '/input.txt', 'w') as f:\n", - " f.write('Staged data (task_id=$RP_TASK_ID | pilot_id=$RP_PILOT_ID | session_id=$RP_SESSION_ID)')\n", + " f.write('Staged data (task_id=$RP_TASK_ID | pilot_id=$RP_PILOT_ID | session_id=$RP_SESSION_ID)')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You will stage the newly created directory `input_dir` with all its files into the `pilot://` location.\n", + "\n", + "
\n", + "\n", + "__Note:__ If provided path for `input_staging` is not an absolute path, then RP will look for it within the current working directory. Using absolute paths will guarantee that the staging data will be located correctly.\n", + "\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Staging directives for the pilot.\n", "\n", "pd = rp.PilotDescription({\n", " 'resource' : 'local.localhost',\n", From 31b1214aa7be6c41cd15324474d144402daf3978 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 12 Sep 2023 15:36:08 -0400 Subject: [PATCH 152/171] deprecated `python.system_packages` --- .readthedocs.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 855af75dfb..aea6df1966 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -4,7 +4,6 @@ version: 2 formats: [htmlzip] python: - system_packages: true version: 3.7 install: - requirements: requirements-docs.txt From c0497dc661641e2fd3e95aac0381d54b551a96f6 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 12 Sep 2023 17:45:40 -0400 Subject: [PATCH 153/171] updated "Describing Tasks" tutorial (added check for MPI LM being installed) --- docs/source/tutorials/describing_tasks.ipynb | 699 +++---------------- 1 file changed, 79 insertions(+), 620 deletions(-) diff --git a/docs/source/tutorials/describing_tasks.ipynb b/docs/source/tutorials/describing_tasks.ipynb index 50e4eaf7f8..b46a8adc2a 100644 --- a/docs/source/tutorials/describing_tasks.ipynb +++ b/docs/source/tutorials/describing_tasks.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "67521807", "metadata": {}, @@ -22,32 +21,35 @@ "\n", "
\n", "\n", + "Let's have a quick check that we have MPI launch method installed. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "567c0f26-3e35-44d3-a81a-ab89a79a3dcd", + "metadata": {}, + "outputs": [], + "source": [ + "import radical.utils as ru\n", + "\n", + "mpi_lm_exists = bool(ru.which('mpirun') or ru.which('mpiexec'))" + ] + }, + { + "cell_type": "markdown", + "id": "ec11a436-b9a8-4706-a126-d5f1ba19bd41", + "metadata": {}, + "source": [ "First, some preparatory work for the tutorial. We import some modules and set some variables. Note that we `import radical.pilot as rp` so to abbreviate future API calls. " ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "c8b8387d", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:26:46.316432Z", - "iopub.status.busy": "2023-05-18T01:26:46.316106Z", - "iopub.status.idle": "2023-05-18T01:26:46.451071Z", - "shell.execute_reply": "2023-05-18T01:26:46.450250Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'/home/mturilli/ve-notebooks'" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, + "outputs": [], "source": [ "import os\n", "import sys\n", @@ -57,15 +59,13 @@ "os.environ['RADICAL_REPORT_ANIME'] = 'False'\n", "\n", "import radical.pilot as rp\n", - "import radical.utils as ru\n", "\n", "# determine the path of the currently active virtualenv to simplify some examples below\n", "ve_path = os.path.dirname(os.path.dirname(ru.which('python3')))\n", - "display(ve_path)\n" + "display(ve_path)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "97ab1560", "metadata": {}, @@ -77,36 +77,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "7e4566d0", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:26:46.455734Z", - "iopub.status.busy": "2023-05-18T01:26:46.455532Z", - "iopub.status.idle": "2023-05-18T01:27:19.693837Z", - "shell.execute_reply": "2023-05-18T01:27:19.692492Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[94m\u001b[1m\n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m================================================================================\n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m Tutorial: Describing Tasks (RP version 1.34.0) \n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m================================================================================\n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m\n", - "\u001b[39m\u001b[0m\u001b[94mnew session: \u001b[39m\u001b[0m[rp.session.three.mturilli.019495.0002]\u001b[39m\u001b[0m\u001b[94m \\\n", - "database : \u001b[39m\u001b[0m[mongodb://rct-tutorial:****@95.217.193.116:27017/rct-tutorial]\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mcreate pilot manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mcreate task manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94msubmit 1 pilot(s)\u001b[39m\u001b[0m\n", - " pilot.0000 local.localhost 32 cores 1 gpus\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mpilot state: PMGR_ACTIVE\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "# configure reporter output \n", "report = ru.Reporter(name='radical.pilot')\n", @@ -131,7 +105,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "1ce411cb", "metadata": {}, @@ -155,34 +128,17 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "1ba782cd", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:19.698223Z", - "iopub.status.busy": "2023-05-18T01:27:19.697879Z", - "iopub.status.idle": "2023-05-18T01:27:19.742619Z", - "shell.execute_reply": "2023-05-18T01:27:19.741824Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "# create a minimal executable task\n", "td = rp.TaskDescription({'executable': '/bin/date'})\n", - "task = tmgr.submit_tasks(td)\n" + "task = tmgr.submit_tasks(td)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "03112275", "metadata": {}, @@ -192,44 +148,15 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "5f2ea29b", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:19.745416Z", - "iopub.status.busy": "2023-05-18T01:27:19.745184Z", - "iopub.status.idle": "2023-05-18T01:27:25.293949Z", - "shell.execute_reply": "2023-05-18T01:27:25.293306Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "wait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "data": { - "text/plain": [ - "['DONE']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": {}, + "outputs": [], "source": [ "tmgr.wait_tasks()" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "9efbbe7a", "metadata": {}, @@ -249,44 +176,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "e7a7d0ac", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:25.296464Z", - "iopub.status.busy": "2023-05-18T01:27:25.296248Z", - "iopub.status.idle": "2023-05-18T01:27:25.315995Z", - "shell.execute_reply": "2023-05-18T01:27:25.315222Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "uid : task.000000\n", - "\u001b[39m\u001b[0mtmgr : tmgr.0000\n", - "\u001b[39m\u001b[0mpilot : pilot.0000\n", - "\u001b[39m\u001b[0mname : \n", - "\u001b[39m\u001b[0mexecutable : /bin/date\n", - "\u001b[39m\u001b[0mstate : DONE\n", - "\u001b[39m\u001b[0mexit_code : 0\n", - "\u001b[39m\u001b[0mstdout : Thu May 18 03:27:23 AM CEST 2023\n", - "\u001b[39m\u001b[0mstderr : \n", - "\u001b[39m\u001b[0mreturn_value : None\n", - "\u001b[39m\u001b[0mexception : None\n", - "\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mendpoint_fs : file://localhost/\n", - "\u001b[39m\u001b[0mresource_sandbox: file://localhost/home/mturilli/radical.pilot.sandbox\n", - "\u001b[39m\u001b[0msession_sandbox : file://localhost/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002\n", - "\u001b[39m\u001b[0mpilot_sandbox : file://localhost/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/pilot.0000/\n", - "\u001b[39m\u001b[0mtask_sandbox : file://localhost/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/pilot.0000/task.000000/\n", - "\u001b[39m\u001b[0mclient_sandbox : /home/mturilli/github/radical.pilot/docs/source/tutorials\n", - "\u001b[39m\u001b[0mmetadata : None\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "report.plain('uid : %s\\n' % task.uid)\n", "report.plain('tmgr : %s\\n' % task.tmgr.uid)\n", @@ -306,11 +199,10 @@ "report.plain('pilot_sandbox : %s\\n' % task.pilot_sandbox)\n", "report.plain('task_sandbox : %s\\n' % task.task_sandbox)\n", "report.plain('client_sandbox : %s\\n' % task.client_sandbox)\n", - "report.plain('metadata : %s\\n' % task.metadata)\n" + "report.plain('metadata : %s\\n' % task.metadata)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "7273c6ea", "metadata": {}, @@ -328,27 +220,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "32c95d9a", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:25.319462Z", - "iopub.status.busy": "2023-05-18T01:27:25.319172Z", - "iopub.status.idle": "2023-05-18T01:27:25.439540Z", - "shell.execute_reply": "2023-05-18T01:27:25.438841Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "create: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0msubmit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "import string\n", "letters = string.ascii_lowercase + string.ascii_uppercase\n", @@ -367,7 +242,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "187dbca6", "metadata": {}, @@ -377,38 +251,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "fa13837b", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:25.441944Z", - "iopub.status.busy": "2023-05-18T01:27:25.441799Z", - "iopub.status.idle": "2023-05-18T01:27:31.296128Z", - "shell.execute_reply": "2023-05-18T01:27:31.295235Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "wait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 3\n", - "\u001b[39m\u001b[0m\u001b[94m\tFAILED : 49\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "task.000021: ['-u']: Thu May 18 01:27:29 AM UTC 2023\n", - "task.000035: ['-I']: 2023-05-18\n", - "task.000044: ['-R']: Thu, 18 May 2023 03:27:29 +0200\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "tmgr.wait_tasks([task.uid for task in tasks])\n", "\n", @@ -418,7 +264,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "0cc12709", "metadata": {}, @@ -428,152 +273,28 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "a3708cb3", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:31.299523Z", - "iopub.status.busy": "2023-05-18T01:27:31.298749Z", - "iopub.status.idle": "2023-05-18T01:27:31.447222Z", - "shell.execute_reply": "2023-05-18T01:27:31.446657Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "wait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 3\n", - "\u001b[39m\u001b[0m\u001b[94m\tFAILED : 49\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "task.000001: ['-a']: /bin/date: invalid option -- 'a'\n", - "Try '/bin/date --help' for more information.\n", - "task.000002: ['-b']: /bin/date: invalid option -- 'b'\n", - "Try '/bin/date --help' for more information.\n", - "task.000003: ['-c']: /bin/date: invalid option -- 'c'\n", - "Try '/bin/date --help' for more information.\n", - "task.000004: ['-d']: /bin/date: option requires an argument -- 'd'\n", - "Try '/bin/date --help' for more information.\n", - "task.000005: ['-e']: /bin/date: invalid option -- 'e'\n", - "Try '/bin/date --help' for more information.\n", - "task.000006: ['-f']: /bin/date: option requires an argument -- 'f'\n", - "Try '/bin/date --help' for more information.\n", - "task.000007: ['-g']: /bin/date: invalid option -- 'g'\n", - "Try '/bin/date --help' for more information.\n", - "task.000008: ['-h']: /bin/date: invalid option -- 'h'\n", - "Try '/bin/date --help' for more information.\n", - "task.000009: ['-i']: /bin/date: invalid option -- 'i'\n", - "Try '/bin/date --help' for more information.\n", - "task.000010: ['-j']: /bin/date: invalid option -- 'j'\n", - "Try '/bin/date --help' for more information.\n", - "task.000011: ['-k']: /bin/date: invalid option -- 'k'\n", - "Try '/bin/date --help' for more information.\n", - "task.000012: ['-l']: /bin/date: invalid option -- 'l'\n", - "Try '/bin/date --help' for more information.\n", - "task.000013: ['-m']: /bin/date: invalid option -- 'm'\n", - "Try '/bin/date --help' for more information.\n", - "task.000014: ['-n']: /bin/date: invalid option -- 'n'\n", - "Try '/bin/date --help' for more information.\n", - "task.000015: ['-o']: /bin/date: invalid option -- 'o'\n", - "Try '/bin/date --help' for more information.\n", - "task.000016: ['-p']: /bin/date: invalid option -- 'p'\n", - "Try '/bin/date --help' for more information.\n", - "task.000017: ['-q']: /bin/date: invalid option -- 'q'\n", - "Try '/bin/date --help' for more information.\n", - "task.000018: ['-r']: /bin/date: option requires an argument -- 'r'\n", - "Try '/bin/date --help' for more information.\n", - "task.000019: ['-s']: /bin/date: option requires an argument -- 's'\n", - "Try '/bin/date --help' for more information.\n", - "task.000020: ['-t']: /bin/date: invalid option -- 't'\n", - "Try '/bin/date --help' for more information.\n", - "task.000022: ['-v']: /bin/date: invalid option -- 'v'\n", - "Try '/bin/date --help' for more information.\n", - "task.000023: ['-w']: /bin/date: invalid option -- 'w'\n", - "Try '/bin/date --help' for more information.\n", - "task.000024: ['-x']: /bin/date: invalid option -- 'x'\n", - "Try '/bin/date --help' for more information.\n", - "task.000025: ['-y']: /bin/date: invalid option -- 'y'\n", - "Try '/bin/date --help' for more information.\n", - "task.000026: ['-z']: /bin/date: invalid option -- 'z'\n", - "Try '/bin/date --help' for more information.\n", - "task.000027: ['-A']: /bin/date: invalid option -- 'A'\n", - "Try '/bin/date --help' for more information.\n", - "task.000028: ['-B']: /bin/date: invalid option -- 'B'\n", - "Try '/bin/date --help' for more information.\n", - "task.000029: ['-C']: /bin/date: invalid option -- 'C'\n", - "Try '/bin/date --help' for more information.\n", - "task.000030: ['-D']: /bin/date: invalid option -- 'D'\n", - "Try '/bin/date --help' for more information.\n", - "task.000031: ['-E']: /bin/date: invalid option -- 'E'\n", - "Try '/bin/date --help' for more information.\n", - "task.000032: ['-F']: /bin/date: invalid option -- 'F'\n", - "Try '/bin/date --help' for more information.\n", - "task.000033: ['-G']: /bin/date: invalid option -- 'G'\n", - "Try '/bin/date --help' for more information.\n", - "task.000034: ['-H']: /bin/date: invalid option -- 'H'\n", - "Try '/bin/date --help' for more information.\n", - "task.000036: ['-J']: /bin/date: invalid option -- 'J'\n", - "Try '/bin/date --help' for more information.\n", - "task.000037: ['-K']: /bin/date: invalid option -- 'K'\n", - "Try '/bin/date --help' for more information.\n", - "task.000038: ['-L']: /bin/date: invalid option -- 'L'\n", - "Try '/bin/date --help' for more information.\n", - "task.000039: ['-M']: /bin/date: invalid option -- 'M'\n", - "Try '/bin/date --help' for more information.\n", - "task.000040: ['-N']: /bin/date: invalid option -- 'N'\n", - "Try '/bin/date --help' for more information.\n", - "task.000041: ['-O']: /bin/date: invalid option -- 'O'\n", - "Try '/bin/date --help' for more information.\n", - "task.000042: ['-P']: /bin/date: invalid option -- 'P'\n", - "Try '/bin/date --help' for more information.\n", - "task.000043: ['-Q']: /bin/date: invalid option -- 'Q'\n", - "Try '/bin/date --help' for more information.\n", - "task.000045: ['-S']: /bin/date: invalid option -- 'S'\n", - "Try '/bin/date --help' for more information.\n", - "task.000046: ['-T']: /bin/date: invalid option -- 'T'\n", - "Try '/bin/date --help' for more information.\n", - "task.000047: ['-U']: /bin/date: invalid option -- 'U'\n", - "Try '/bin/date --help' for more information.\n", - "task.000048: ['-V']: /bin/date: invalid option -- 'V'\n", - "Try '/bin/date --help' for more information.\n", - "task.000049: ['-W']: /bin/date: invalid option -- 'W'\n", - "Try '/bin/date --help' for more information.\n", - "task.000050: ['-X']: /bin/date: invalid option -- 'X'\n", - "Try '/bin/date --help' for more information.\n", - "task.000051: ['-Y']: /bin/date: invalid option -- 'Y'\n", - "Try '/bin/date --help' for more information.\n", - "task.000052: ['-Z']: /bin/date: invalid option -- 'Z'\n", - "Try '/bin/date --help' for more information.\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "tmgr.wait_tasks([task.uid for task in tasks])\n", "\n", "for task in tasks:\n", " if task.state == rp.FAILED:\n", - " print('%s: %s: %s' % (task.uid, task.description['arguments'], task.stderr.strip()))\n" + " print('%s: %s: %s' % (task.uid, task.description['arguments'], task.stderr.strip()))" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "82299910", "metadata": {}, "source": [ "## MPI Tasks and Task Resources\n", "\n", - "So far, we run single-core tasks. The most common way for application to utilize multiple cores and nodes on HPC machines is to use MPI as communication layer which coordinates multiple application processes, i.e., MPI ranks. In fact, the notion of `ranks` is central to RP's `TaskDescription` class. All MPI ranks will be near-exact copies of each other: they run in the same work directory and the same `environment`, are defined by the same `executable` and `arguments`, get the same amount of resources allocated, etc. Notable exceptions are:\n", + "So far, we run single-core tasks. The most common way for application to utilize multiple cores and nodes on HPC machines is to use MPI as a communication layer, which coordinates multiple application processes, i.e., MPI ranks. In fact, the notion of `ranks` is central to RP's `TaskDescription` class. All MPI ranks will be near-exact copies of each other: they run in the same work directory and the same `environment`, are defined by the same `executable` and `arguments`, get the same amount of resources allocated, etc. Notable exceptions are:\n", "\n", - " - Rank processes may run on different nodes;\n", + " - rank processes may run on different nodes;\n", " - rank processes can communicate via MPI;\n", " - each rank process obtains a unique rank ID.\n", "\n", @@ -601,125 +322,28 @@ "\n", "__Note:__ No core pinning is performed on localhost. Thus, tasks see all CPU cores as available to them. However, the `THREADS` information still reports the correct number of assigned CPU cores.\n", "\n", + "
\n", + "\n", + "
\n", + "\n", + "__Note:__ If there is no MPI launch method installed, then we will proceed with a single rank.\n", + "\n", "
" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "9047b209", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:31.450159Z", - "iopub.status.busy": "2023-05-18T01:27:31.449859Z", - "iopub.status.idle": "2023-05-18T01:27:39.266337Z", - "shell.execute_reply": "2023-05-18T01:27:39.265474Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - ".\u001b[39m\u001b[0m.\u001b[39m\u001b[0m.\u001b[39m\u001b[0m.\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0msubmit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 4\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--- task.000053:\n", - "0 : PID : 1284029\n", - "0 : NODE : three\n", - "0 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 1\n", - "0 : SLEEP : 1\n", - "\n", - "--- task.000054:\n", - "0 : PID : 1284080\n", - "0 : NODE : three\n", - "0 : CPUS : 0000000000000000000000000000000100000000000000000000000000000001\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 2\n", - "0 : SLEEP : 2\n", - "1 : PID : 1284086\n", - "1 : NODE : three\n", - "1 : CPUS : 0000000000000000000000000000001000000000000000000000000000000010\n", - "1 : GPUS : 0\n", - "1 : RANK : 1\n", - "1 : THREADS : 2\n", - "1 : SLEEP : 2\n", - "\n", - "--- task.000055:\n", - "1 : PID : 1284190\n", - "1 : NODE : three\n", - "1 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "1 : GPUS : 0\n", - "1 : RANK : 1\n", - "1 : THREADS : 3\n", - "1 : SLEEP : 3\n", - "2 : PID : 1284205\n", - "2 : NODE : three\n", - "2 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "2 : GPUS : 0\n", - "2 : RANK : 2\n", - "2 : THREADS : 3\n", - "2 : SLEEP : 3\n", - "0 : PID : 1284167\n", - "0 : NODE : three\n", - "0 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 3\n", - "0 : SLEEP : 3\n", - "\n", - "--- task.000056:\n", - "3 : PID : 1284214\n", - "3 : NODE : three\n", - "3 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "3 : GPUS : 0\n", - "3 : RANK : 3\n", - "3 : THREADS : 4\n", - "3 : SLEEP : 4\n", - "0 : PID : 1284157\n", - "0 : NODE : three\n", - "0 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 4\n", - "0 : SLEEP : 4\n", - "1 : PID : 1284180\n", - "1 : NODE : three\n", - "1 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "1 : GPUS : 0\n", - "1 : RANK : 1\n", - "1 : THREADS : 4\n", - "1 : SLEEP : 4\n", - "2 : PID : 1284192\n", - "2 : NODE : three\n", - "2 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "2 : GPUS : 0\n", - "2 : RANK : 2\n", - "2 : THREADS : 4\n", - "2 : SLEEP : 4\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "tds = list()\n", "for n in range(4):\n", + " ranks = (n + 1) if mpi_lm_exists else 1\n", " tds.append(rp.TaskDescription({'executable' : ve_path + '/bin/radical-pilot-hello.sh',\n", " 'arguments' : [n + 1], \n", - " 'ranks' : (n + 1), \n", + " 'ranks' : ranks, \n", " 'cores_per_rank': (n + 1),\n", " 'threading_type': rp.OpenMP}))\n", " report.progress()\n", @@ -730,11 +354,10 @@ "tmgr.wait_tasks([task.uid for task in tasks])\n", "\n", "for task in tasks:\n", - " print('--- %s:\\n%s\\n' % (task.uid, task.stdout.strip()))\n" + " print('--- %s:\\n%s\\n' % (task.uid, task.stdout.strip()))" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "420ed233", "metadata": {}, @@ -753,41 +376,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "0fd464ed", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:39.271133Z", - "iopub.status.busy": "2023-05-18T01:27:39.270540Z", - "iopub.status.idle": "2023-05-18T01:27:45.174660Z", - "shell.execute_reply": "2023-05-18T01:27:45.173670Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-rw---- 1 mturilli mturilli 24 May 18 03:27 /tmp/output.test.dat\n", - "-rw-rw---- 1 mturilli mturilli 0 May 18 03:27 /tmp/output.test.err\n", - "-rw-rw---- 1 mturilli mturilli 0 May 18 03:27 /tmp/output.test.out\n", - "\n", - " 61 104 3465\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "\n", "td = rp.TaskDescription({'executable' : '/bin/sh',\n", @@ -808,7 +400,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "a4bb97c2", "metadata": {}, @@ -823,7 +414,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "200d8813", "metadata": {}, @@ -851,55 +441,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "059fa07e", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:45.178679Z", - "iopub.status.busy": "2023-05-18T01:27:45.177969Z", - "iopub.status.idle": "2023-05-18T01:27:49.365187Z", - "shell.execute_reply": "2023-05-18T01:27:49.364347Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[... CONTENT SHORTENED ...]\n", - "EL_ADDR=144.76.72.175:27017\n", - "RP_BOOTSTRAP_0_REDIR=True\n", - "RP_GTOD=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//gtod\n", - "RP_PILOT_ID=pilot.0000\n", - "RP_PILOT_SANDBOX=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000/\n", - "RP_PROF=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//prof\n", - "RP_PROF_TGT=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//task.000058/task.000058.prof\n", - "RP_RANK=0\n", - "RP_RANKS=1\n", - "RP_RESOURCE=local.localhost\n", - "RP_RESOURCE_SANDBOX=/home/mturilli/radical.pilot.sandbox\n", - "RP_SESSION_ID=rp.session.three.mturilli.019495.0002\n", - "RP_SESSION_SANDBOX=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/\n", - "RP_TASK_ID=task.000058\n", - "RP_TASK_NAME=task.000058\n", - "RP_TASK_SANDBOX=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//task.000058\n", - "RP_VENV_PATH=/home/mturilli/radical.pilot.sandbox/ve.local.localhost.1.34.0\n", - "RP_VENV_TYPE=venv\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "td = rp.TaskDescription({'executable' : '/bin/sh',\n", " 'arguments' : ['-c', 'printf \"FOO=$FOO\\nBAR=$BAR\\nSHELL=$SHELL\\n\"; env | grep RP_ | sort'],\n", @@ -911,7 +456,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "70d849d8", "metadata": {}, @@ -936,48 +480,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "15728941", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:49.368303Z", - "iopub.status.busy": "2023-05-18T01:27:49.367565Z", - "iopub.status.idle": "2023-05-18T01:27:54.344539Z", - "shell.execute_reply": "2023-05-18T01:27:54.343996Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pyyaml in /home/mturilli/ve-notebooks/lib/python3.10/site-packages (6.0)\n", - "/home/mturilli/ve-notebooks/bin/python3\n", - "Name: PyYAML\n", - "Version: 6.0\n", - "Summary: YAML parser and emitter for Python\n", - "Home-page: https://pyyaml.org/\n", - "Author: Kirill Simonov\n", - "Author-email: xi@resolvent.net\n", - "License: MIT\n", - "Location: /home/mturilli/ve-notebooks/lib/python3.10/site-packages\n", - "Requires: \n", - "Required-by: jupyter-events, jupyter-nbextensions-configurator, myst-parser\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "td = rp.TaskDescription({'pre_exec' : ['. %s/bin/activate' % ve_path, \n", " 'pip install pyyaml'],\n", @@ -986,11 +492,10 @@ " })\n", "task = tmgr.submit_tasks(td)\n", "tmgr.wait_tasks([task.uid])\n", - "print(task.stdout)\n" + "print(task.stdout)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "78f3f8a7", "metadata": {}, @@ -1002,38 +507,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "41467fc2", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:54.346733Z", - "iopub.status.busy": "2023-05-18T01:27:54.346503Z", - "iopub.status.idle": "2023-05-18T01:28:25.421677Z", - "shell.execute_reply": "2023-05-18T01:28:25.420783Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/pilot.0000/env/rp_named_env.test_env/bin/python3\n", - "psutil 5.9.5\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "\n", "pilot.prepare_env(env_name='test_env', \n", @@ -1051,32 +528,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "9c914fc2", "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:25.424512Z", - "iopub.status.busy": "2023-05-18T01:28:25.424291Z", - "iopub.status.idle": "2023-05-18T01:28:25.429186Z", - "shell.execute_reply": "2023-05-18T01:28:25.428627Z" - }, "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[93m\u001b[1m\n", - "\u001b[39m\u001b[0m\u001b[93m\u001b[1m--------------------------------------------------------------------------------\n", - "\u001b[39m\u001b[0m\u001b[93m\u001b[1mfinalize \n", - "\u001b[39m\u001b[0m\u001b[93m\u001b[1m\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "outputs": [], "source": [ "report.header('finalize')\n", "session.close()" @@ -1085,7 +544,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1099,7 +558,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.9.13" }, "varInspector": { "cols": { From d6844e28630171f3bf65c3ea3d567860c189034a Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 12 Sep 2023 17:54:38 -0400 Subject: [PATCH 154/171] added package `mpich` into dependencies for CI Notebooks --- .github/workflows/run-rp-notebook.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run-rp-notebook.yml b/.github/workflows/run-rp-notebook.yml index b8323573e1..57de1dab78 100644 --- a/.github/workflows/run-rp-notebook.yml +++ b/.github/workflows/run-rp-notebook.yml @@ -41,6 +41,7 @@ jobs: python-version: ${{ inputs.python-version }} - name: Install dependencies run: | + sudo apt update -y && sudo apt install -y mpich python -m venv testenv . testenv/bin/activate python -m pip install --upgrade pip setuptools wheel From 7e4f0b9563d1cc16305908eaef1a6e0d7c29a0b7 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Tue, 12 Sep 2023 17:55:37 -0400 Subject: [PATCH 155/171] added package `mpich` into dependencies for ReadTheDocs config --- .readthedocs.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 855af75dfb..ccd4d4705c 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,9 +3,15 @@ version: 2 formats: [htmlzip] +build: + os: "ubuntu-22.04" + tools: + python: "3.7" + apt_packages: + - mpich + python: system_packages: true - version: 3.7 install: - requirements: requirements-docs.txt - method: pip From 899bc9bd77755f82efd3300258d82f20b8c9412f Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 13 Sep 2023 14:00:46 -0400 Subject: [PATCH 156/171] fixed test for `Agent_0` --- tests/unit_tests/test_agent_0/test_agent_0.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index 19cb273b71..02fd8fba6a 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -149,7 +149,9 @@ def test_start_sub_agents(self, mocked_run_sh_callout, mocked_ru_env_prep, agent_0._pwd = tempfile.gettempdir() agent_0._log = mock.Mock() agent_0._sid = 'rp.session.0' - agent_0._cfg = ru.Config(from_dict={ + + agent_0._session = mock.Mock() + agent_0._session.cfg = ru.Config(from_dict={ 'agents': { 'agent_1': {'target' : 'node', 'components': {'agent_executing': {'count': 1}}} @@ -199,7 +201,7 @@ def check_agent_task(agent_task, *args, **kwargs): os.unlink(agent_file) # incorrect config setup for agent ('target' is in ['local', 'node']) - agent_0._cfg['agents']['agent_1']['target'] = 'incorrect_target' + agent_0._session.cfg['agents']['agent_1']['target'] = 'incorrect_target' with self.assertRaises(ValueError): agent_0._start_sub_agents() From 739efbbacc433d68a42c0568204fff3ecac7e4c9 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 13 Sep 2023 15:33:27 -0400 Subject: [PATCH 157/171] fixed tests for `AgentExecutingComponent` --- tests/unit_tests/test_executing/test_base.py | 25 +++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/tests/unit_tests/test_executing/test_base.py b/tests/unit_tests/test_executing/test_base.py index 14c92817c6..0f8df63e71 100755 --- a/tests/unit_tests/test_executing/test_base.py +++ b/tests/unit_tests/test_executing/test_base.py @@ -43,7 +43,7 @@ def work(self, tasks): for spawner in spawners: session = ru.Config(cfg={ - '_rcfg': { 'agent_spawner' : spawner}}) + 'rcfg': {'agent_spawner' : spawner}}) try: AgentExecutingComponent.create(cfg=spawner, session=session) except: @@ -62,22 +62,19 @@ def work(self, tasks): def test_initialize(self, mocked_rm, mocked_init): ec = AgentExecutingComponent(cfg=None, session=None) - ec._cfg = ru.TypedDict(from_dict={ - 'sid' : 'sid.0000', + + ec._session = mock.Mock() + ec._session.uid = 'sid.0000' + ec._session.cfg = ru.TypedDict(from_dict={ + 'resource' : 'resource_config_label', 'resource_sandbox': '', 'session_sandbox' : '', - 'pilot_sandbox' : '', - 'resource' : 'resource_config_label', - 'resource_cfg' : {'order': [], - 'launch_methods': {'SRUN': {}}} + 'pilot_sandbox' : '' }) - ec._reg = ru.Config(cfg={ - 'cfg' : {'resource' : 'localhost', - 'pilot_sandbox' : '', - 'session_sandbox' : '', - 'resource_sandbox': ''}, - 'rcfg': {'resource_manager': 'FORK', - 'agent_spawner' : 'POPEN'}}) + ec._session.rcfg = ru.TypedDict(from_dict={ + 'resource_manager': 'FORK', + 'agent_spawner' : 'POPEN'}) + ec._log = ec._prof = mock.Mock() ec.work = ec.control_cb = mock.Mock() ec.register_input = ec.register_output = mock.Mock() From 4aa0fb6718b7ed1c143a3d8869320ed63c92abd9 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 13 Sep 2023 15:44:08 -0400 Subject: [PATCH 158/171] fixed tests for Executor `Popen` --- tests/unit_tests/test_executing/test_popen.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/unit_tests/test_executing/test_popen.py b/tests/unit_tests/test_executing/test_popen.py index 76d84ee7af..b18d59d0af 100755 --- a/tests/unit_tests/test_executing/test_popen.py +++ b/tests/unit_tests/test_executing/test_popen.py @@ -81,8 +81,6 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, pex._log = pex._prof = pex._watch_queue = mock.Mock() pex._log._debug_level = 1 - pex._reg = ru.Config(from_dict={'rcfg.new_session_per_task': False}) - pex._cfg = dict() pex._pwd = '' pex._pid = 'pilot.0000' pex.sid = 'session.0000' @@ -92,7 +90,9 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, pex.psbox = '' pex.gtod = '' pex.prof = '' - pex._session = ru.Config(cfg={'cfg': {'resource_cfg': {}}}) + + pex._session = mock.Mock() + pex._session.rcfg = ru.Config(from_dict={'new_session_per_task': False}) pex._rm = mock.Mock() pex._rm.find_launcher = mocked_find_launcher @@ -135,8 +135,9 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init, def test_extend_pre_exec(self, mocked_init): pex = Popen(cfg=None, session=None) - pex._session = mock.Mock() - pex._session.cfg.get.return_value = None + + pex._session = mock.Mock() + pex._session.rcfg = {} td = {'cores_per_rank': 2, 'threading_type': '', @@ -154,7 +155,7 @@ def test_extend_pre_exec(self, mocked_init): 'gpu_type' : rpc.CUDA}) # we target attribute "task_pre_exec" - pex._session.cfg.get.return_value = ['export TEST_ENV=test'] + pex._session.rcfg = {'task_pre_exec': ['export TEST_ENV=test']} pex._extend_pre_exec(td, ranks) self.assertIn('export OMP_NUM_THREADS=2', td['pre_exec']) From 944a47fb7341c7c64f53186bd4c3c12b9781b04f Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 13 Sep 2023 15:59:45 -0400 Subject: [PATCH 159/171] fixed tests for Base Scheduling --- tests/unit_tests/test_scheduler/test_base.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/unit_tests/test_scheduler/test_base.py b/tests/unit_tests/test_scheduler/test_base.py index 4eff012631..be0f4d7ada 100755 --- a/tests/unit_tests/test_scheduler/test_base.py +++ b/tests/unit_tests/test_scheduler/test_base.py @@ -54,6 +54,8 @@ def test_initialize(self, mocked_env_eval, mocked_hostname, mocked_mp, sched.nodes = [] sched._partitions = {} + sched._session = mock.Mock() + for c in self._test_cases['initialize']: def _mock_get(_c, name): @@ -62,9 +64,10 @@ def _mock_get(_c, name): from functools import partial mock_get = partial(_mock_get, c) - sched._cfg = ru.Config(from_dict={'reg_addr': 'addr'}) - sched._reg = ru.Config(from_dict={'cfg': c['config'], - 'rcfg': c['config']['resource_cfg']}) + sched._session.cfg = ru.Config( + from_dict=c['config']) + sched._session.rcfg = ru.Config( + from_dict=c['config']['resource_cfg']) with mock.patch.object(ru.zmq.RegistryClient, 'get', mock_get): if 'RuntimeError' in c['result']: From 03e464f16ce9f85aa8440df3165d5f565b417625 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 13 Sep 2023 16:03:53 -0400 Subject: [PATCH 160/171] enable tests for all devel-branches --- .github/workflows/ci.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8ade818cf6..03b65a3b86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,9 +3,11 @@ name: CI on: push: - branches: [ devel ] + branches: + - 'devel*' pull_request: - branches: [ devel ] + branches: + - 'devel*' jobs: From b0ac106bfa95bb1998b091ecb172e8cb4191e5c6 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 16 Sep 2023 13:05:33 +0200 Subject: [PATCH 161/171] snap --- examples/00_getting_started.py | 1 + src/radical/pilot/agent/agent_0.py | 2 + src/radical/pilot/messages.py | 12 +++--- src/radical/pilot/pilot.py | 59 ++++++++++++++++++++---------- 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/examples/00_getting_started.py b/examples/00_getting_started.py index d4b8224d67..4a0595d457 100755 --- a/examples/00_getting_started.py +++ b/examples/00_getting_started.py @@ -25,6 +25,7 @@ report.title('Getting Started (RP version %s)' % rp.version) # use the resource specified as argument, fall back to localhost + resource = None if len(sys.argv) > 2: report.exit('Usage:\t%s [resource]\n\n' % sys.argv[0]) elif len(sys.argv) == 2: resource = sys.argv[1] else : resource = 'local.localhost' diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 199f80f265..5861835343 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -573,6 +573,8 @@ def _control_cb(self, _, msg): # def _ctrl_prepare_env(self, msg): + self._log.debug('=== control prep env') + arg = msg['arg'] for env_id in arg: diff --git a/src/radical/pilot/messages.py b/src/radical/pilot/messages.py index 4bf8cbab02..2cd141c55b 100644 --- a/src/radical/pilot/messages.py +++ b/src/radical/pilot/messages.py @@ -28,8 +28,8 @@ class HeartbeatMessage(RPBaseMessage): _schema = {'uid' : str} _defaults = {'_msg_type': 'heartbeat', - 'uid' : None, - 'fwd' : False} + 'fwd' : False, + 'uid' : None} ru.Message.register_msg_type('heartbeat', HeartbeatMessage) @@ -46,12 +46,12 @@ class RPCRequestMessage(RPBaseMessage): 'kwargs' : dict} # rpc command named arguments _defaults = { '_msg_type': 'rpc_req', + 'fwd' : True, 'uid' : None, 'addr' : None, 'cmd' : None, 'args' : [], - 'kwargs' : {}, - 'fwd' : True} + 'kwargs' : {}} @@ -68,12 +68,12 @@ class RPCResultMessage(RPBaseMessage): 'err' : str, # stderr 'exc' : str} # raised exception representation _defaults = {'_msg_type': 'rpc_res', + 'fwd' : True, 'uid' : None, 'val' : None, 'out' : None, 'err' : None, - 'exc' : None, - 'fwd' : True} + 'exc' : None} # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 6f854128a9..21ea407125 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -7,12 +7,15 @@ import time import queue +import threading as mt + import radical.utils as ru from . import PilotManager from . import states as rps from . import constants as rpc +from .messages import RPCRequestMessage, RPCResultMessage from .staging_directives import complete_url @@ -157,7 +160,7 @@ def __init__(self, pmgr: PilotManager, descr): self._pilot_sandbox .path = self._pilot_sandbox .path % expand # hook into the control pubsub for rpc handling - self._rpc_queue = queue.Queue() + self._rpc_reqs = dict() self._ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] self._ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] @@ -735,15 +738,20 @@ def stage_in(self, sds): # -------------------------------------------------------------------------- # - def _control_cb(self, topic, msg): + def _control_cb(self, topic, msg_data): + + # we only listen for RPCResponse messages - cmd = msg['cmd'] - arg = msg['arg'] + try: + msg = ru.zmq.Message.deserialize(msg_data) + self._log.debug('=== rpc res: %s', msg) - if cmd == 'rpc_res': + if msg.uid in self._rpc_reqs: + self._rpc_reqs[msg.uid]['res'] = msg + self._rpc_reqs[msg.uid]['evt'].set() - self._log.debug('rpc res: %s', arg) - self._rpc_queue.put(arg) + except: + self._log.debug('=== ignore msg %s', msg_data) # -------------------------------------------------------------------------- @@ -756,26 +764,37 @@ def rpc(self, cmd, args=None): thread safe to have multiple concurrent RPC calls. ''' + self._log.debug('=== pilot in %s state', self.state) + self.wait(rps.PMGR_ACTIVE) + self._log.debug('=== pilot now in %s state', self.state) + if not args: args = dict() - rpc_id = ru.generate_id('rpc') - rpc_req = {'uid' : rpc_id, - 'rpc' : cmd, - 'tgt' : self._uid, - 'arg' : args} + rpc_id = ru.generate_id('%s.rpc' % self._uid) + rpc_req = RPCRequestMessage(uid=rpc_id, cmd=cmd, args=args) + + self._rpc_reqs[rpc_id] = { + 'req': rpc_req, + 'res': None, + 'evt': mt.Event(), + 'time': time.time(), + } + + self._log.debug('=== wait for rpc request %s', rpc_req) + while True: + if not self._rpc_reqs[rpc_id]['evt'].wait(timeout=10): + self._log.debug('=== still waiting for rpc request %s', rpc_id) + continue - self._ctrl_pub.put(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_req', - 'arg': rpc_req, - 'fwd': True}) - rpc_res = self._rpc_queue.get() - self._log.debug('rpc result: %s', rpc_res['ret']) + rpc_res = self._rpc_reqs[rpc_id]['res'] + self._log.debug('=== rpc result: %s', rpc_res) - if rpc_res['ret']: - raise RuntimeError('rpc failed: %s' % rpc_res['err']) + if rpc_res.exc: + raise RuntimeError('=== rpc failed: %s' % rpc_res.exc) - return rpc_res['ret'] + return rpc_res.val # -------------------------------------------------------------------------- From ec3f84a590fd231a298e08accd89ee8e5749b1a8 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 16 Sep 2023 13:09:31 +0200 Subject: [PATCH 162/171] snap --- examples/12_task_env.py | 2 + src/radical/pilot/agent/agent_0.py | 91 ++---------- src/radical/pilot/agent/executing/base.py | 2 +- src/radical/pilot/agent/executing/sleep.py | 1 + src/radical/pilot/agent/scheduler/base.py | 6 +- src/radical/pilot/pilot.py | 16 +-- src/radical/pilot/pilot_manager.py | 7 +- src/radical/pilot/pmgr/launching/base.py | 5 +- src/radical/pilot/raptor/master.py | 4 +- src/radical/pilot/task_manager.py | 12 +- src/radical/pilot/tmgr/scheduler/base.py | 12 +- .../pilot/tmgr/staging_input/default.py | 9 +- src/radical/pilot/utils/component.py | 135 +++++++++++++++--- src/radical/pilot/utils/component_manager.py | 3 + src/radical/pilot/utils/rpc_helper.py | 48 +------ 15 files changed, 165 insertions(+), 188 deletions(-) diff --git a/examples/12_task_env.py b/examples/12_task_env.py index 4d3b4fb499..8f92787d50 100755 --- a/examples/12_task_env.py +++ b/examples/12_task_env.py @@ -69,8 +69,10 @@ # Launch the pilot. pilot = pmgr.submit_pilots(pdesc) + report.header('prepare task env') pilot.prepare_env('numpy_env', {'type' : 'virtualenv', 'setup': ['numpy']}) + report.ok('ok') report.header('submit tasks') diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 5861835343..fe4ae4ac39 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -82,7 +82,7 @@ def __init__(self): # def _proxy_input_cb(self, msg): - self._log.debug('proxy input cb: %s', len(msg)) + self._log.debug('====== proxy input cb: %s', len(msg)) to_advance = list() @@ -183,9 +183,6 @@ def _configure_app_comm(self): # def initialize(self): - # handle pilot commands - self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) - # listen for new tasks from the client self.register_input(rps.AGENT_STAGING_INPUT_PENDING, rpc.PROXY_TASK_QUEUE, @@ -205,9 +202,16 @@ def initialize(self): self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.PROXY_TASK_QUEUE) - # subscribe for control messages # FIXME: to be removed (duplication) - # ru.zmq.Subscriber(channel=rpc.CONTROL_PUBSUB, cb=self._control_cb, - # url=self._reg['bridges.%s.addr_sub' % rpc.CONTROL_PUBSUB]) + # hook into the control pubsub for rpc handling + ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] + ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] + + self._rpc_helper = rpu.RPCHelper(owner=self._uid, + ctrl_addr_pub=ctrl_addr_pub, + ctrl_addr_sub=ctrl_addr_sub, + log=self._log, prof=self._prof) + + self._rpc_helper.add_handler('prepare_env', self._prepare_env) # before we run any tasks, prepare a named_env `rp` for tasks which use # the pilot's own environment, such as raptors @@ -218,7 +222,7 @@ def initialize(self): 'export PATH=%s' % os.environ.get('PATH', '')] } - self._prepare_env('rp', env_spec) + self._rpc_helper.request('prepare_env', env_name='rp', env_spec=env_spec) # start any services if they are requested self._start_services() @@ -535,13 +539,13 @@ def _check_lifetime(self): # -------------------------------------------------------------------------- # - def _control_cb(self, _, msg): + def control_cb(self, topic, msg): ''' Check for commands on the control pubsub, mainly waiting for RPC requests to handle. ''' - self._log.debug('==== control: %s', msg) + self._log.debug('==== %s: %s', topic, msg) cmd = msg['cmd'] arg = msg['arg'] @@ -554,35 +558,15 @@ def _control_cb(self, _, msg): self._session._hb.beat(uid=self._pmgr) return True - elif cmd == 'prep_env': - return self._ctrl_prepare_env(msg) - elif cmd == 'cancel_pilots': return self._ctrl_cancel_pilots(msg) - elif cmd == 'rpc_req': - return self._ctrl_rpc_req(msg) - elif cmd == 'service_up': return self._ctrl_service_up(msg) return True - # -------------------------------------------------------------------------- - # - def _ctrl_prepare_env(self, msg): - - self._log.debug('=== control prep env') - - arg = msg['arg'] - - for env_id in arg: - self._prepare_env(env_id, arg[env_id]) - - return True - - # -------------------------------------------------------------------------- # def _ctrl_cancel_pilots(self, msg): @@ -602,51 +586,6 @@ def _ctrl_cancel_pilots(self, msg): return False - # -------------------------------------------------------------------------- - # - def _ctrl_rpc_req(self, msg): - - cmd = msg['cmd'] - arg = msg['arg'] - req = arg['rpc'] - - if req not in ['hello', 'prepare_env']: - # we don't handle that request - return True - - rpc_res = {'uid': arg['uid']} - - try: - if req == 'hello' : - out = 'hello %s' % ' '.join(arg['arg']) - - elif req == 'prepare_env': - env_name = arg['arg']['env_name'] - env_spec = arg['arg']['env_spec'] - out = self._prepare_env(env_name, env_spec) - - else: - # unknown command - self._log.info('ignore rpc command: %s', req) - return True - - # request succeeded - respond with return value - rpc_res['err'] = None - rpc_res['out'] = out - rpc_res['ret'] = 0 - - except Exception as e: - # request failed for some reason - indicate error - rpc_res['err'] = repr(e) - rpc_res['out'] = None - rpc_res['ret'] = 1 - self._log.exception('control cmd failed') - - # publish the response (success or failure) - self.publish(rpc.CONTROL_PUBSUB, {'cmd': 'rpc_res', - 'arg': rpc_res}) - - # -------------------------------------------------------------------------- # def _ctrl_service_up(self, msg): @@ -685,7 +624,7 @@ def _ctrl_service_up(self, msg): # def _prepare_env(self, env_name, env_spec): - self._log.debug('env_spec: %s', env_spec) + self._log.debug('env_spec %s: %s', env_name, env_spec) etype = env_spec.get('type', 'venv') evers = env_spec.get('version') diff --git a/src/radical/pilot/agent/executing/base.py b/src/radical/pilot/agent/executing/base.py index 9ae466a46f..263c2dd3ef 100644 --- a/src/radical/pilot/agent/executing/base.py +++ b/src/radical/pilot/agent/executing/base.py @@ -106,7 +106,6 @@ def initialize(self): rpc.AGENT_STAGING_OUTPUT_QUEUE) self.register_publisher (rpc.AGENT_UNSCHEDULE_PUBSUB) - self.register_subscriber(rpc.CONTROL_PUBSUB, self.control_cb) self._to_tasks = list() self._to_lock = mt.Lock() @@ -131,6 +130,7 @@ def control_cb(self, topic, msg): cmd = msg['cmd'] arg = msg['arg'] + # FIXME RPC: already handled in the component base class if cmd == 'cancel_tasks': self._log.info('cancel_tasks command (%s)', arg) diff --git a/src/radical/pilot/agent/executing/sleep.py b/src/radical/pilot/agent/executing/sleep.py index 94b44abfb7..5940ade971 100644 --- a/src/radical/pilot/agent/executing/sleep.py +++ b/src/radical/pilot/agent/executing/sleep.py @@ -157,6 +157,7 @@ def control_cb(self, topic, msg): cmd = msg['cmd'] + # FIXME RPC: already handled in the component base class if cmd == 'cancel_tasks': # FIXME: clarify how to cancel tasks diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 51c45fbd1c..bff452efa7 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -316,7 +316,7 @@ def create(cls, cfg, session): # -------------------------------------------------------------------------- # - def _control_cb(self, topic, msg): + def control_cb(self, topic, msg): ''' listen on the control channel for raptor queue registration commands ''' @@ -386,6 +386,7 @@ def _control_cb(self, topic, msg): self.advance(tasks, state=rps.FAILED, publish=True, push=False) + # FIXME: RPC: this is caught in the base class handler already elif cmd == 'cancel_tasks': uids = arg['uids'] @@ -651,9 +652,6 @@ def _schedule_tasks(self): self._raptor_tasks = dict() # raptor_master_id : [task] self._raptor_lock = mt.Lock() # lock for the above - # subscribe to control messages, e.g., to register raptor queues - self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) - # register task output channels self.register_output(rps.AGENT_EXECUTING_PENDING, rpc.AGENT_EXECUTING_QUEUE) diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 21ea407125..8797d95e91 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -161,17 +161,11 @@ def __init__(self, pmgr: PilotManager, descr): # hook into the control pubsub for rpc handling self._rpc_reqs = dict() - self._ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] - self._ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] + ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] - ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=self._ctrl_addr_sub, - log=self._log, prof=self._prof, - cb=self._control_cb, topic=rpc.CONTROL_PUBSUB) - - self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, url=self._ctrl_addr_pub, - log=self._log, prof=self._prof) - - ru.zmq.test_pubsub(rpc.CONTROL_PUBSUB, self._ctrl_addr_pub, self._ctrl_addr_sub) + ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=ctrl_addr_sub, + log=self._log, prof=self._prof, cb=self._control_cb, + topic=rpc.CONTROL_PUBSUB) # -------------------------------------------------------------------------- @@ -756,7 +750,7 @@ def _control_cb(self, topic, msg_data): # -------------------------------------------------------------------------- # - def rpc(self, cmd, args=None): + def rpc(self, cmd, args=None, kwargs=None): '''Remote procedure call. Send am RPC command and arguments to the pilot and wait for the diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index e05de70de4..354bd176bc 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -161,9 +161,6 @@ def __init__(self, session, cfg='default'): # also listen to the state pubsub for pilot state changes self.register_subscriber(rpc.STATE_PUBSUB, self._state_sub_cb) - # also listen to the state control for pilot activation - self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_sub_cb) - # let session know we exist self._session._register_pmgr(self) @@ -311,7 +308,7 @@ def _state_sub_cb(self, topic, msg): if 'type' in thing and thing['type'] == 'pilot': self._log.debug('state push: %s: %s %s', thing['uid'], - thing['state'], thing.get('resources')) + thing['state'], thing.get('resources')) # we got the state update from the state callback - don't # publish it again @@ -322,7 +319,7 @@ def _state_sub_cb(self, topic, msg): # -------------------------------------------------------------------------- # - def _control_sub_cb(self, topic, msg): + def control_cb(self, topic, msg): if self._terminate.is_set(): return False diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index 93822d1f70..9a725beaf8 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -134,9 +134,6 @@ def __init__(self, cfg, session): self._stager_queue = self.get_output_ep(rpc.STAGER_REQUEST_QUEUE) - # we listen for pilot cancel commands - self.register_subscriber(rpc.CONTROL_PUBSUB, self._pmgr_control_cb) - # also listen for completed staging directives self.register_subscriber(rpc.STAGER_RESPONSE_PUBSUB, self._staging_ack_cb) self._active_sds = dict() @@ -220,7 +217,7 @@ def finalize(self): # -------------------------------------------------------------------------- # - def _pmgr_control_cb(self, topic, msg): + def control_cb(self, topic, msg): cmd = msg['cmd'] arg = msg['arg'] diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index a3a7c16da5..46a1e29225 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -175,7 +175,6 @@ def __init__(self, cfg: ru.Config = None): # everything is set up - we can serve messages on the pubsubs also self.register_subscriber(rpc.STATE_PUBSUB, self._state_cb) - self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) # and register that input queue with the scheduler self._log.debug('registered raptor queue: %s / %s', self._uid, qname) @@ -214,7 +213,7 @@ def workers(self): # -------------------------------------------------------------------------- # - def _control_cb(self, topic, msg): + def control_cb(self, topic, msg): ''' listen for `worker_register`, `worker_unregister`, `worker_rank_heartbeat` and `rpc_req` messages. @@ -275,6 +274,7 @@ def _control_cb(self, topic, msg): self._workers[uid]['status'] = self.DONE + # FIXME RPC elif cmd == 'rpc_req': if arg['tgt'] != self._uid: diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index 7ad8bbf193..a28406d50f 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -181,15 +181,10 @@ def __init__(self, session, cfg='default', scheduler=None): # hook into the control pubsub for rpc handling self._rpc_queue = queue.Queue() - ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] - ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=ctrl_addr_sub, - log=self._log, prof=self._prof, - cb=self._control_cb, topic=rpc.CONTROL_PUBSUB) - - self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, url=ctrl_addr_pub, - log=self._log, prof=self._prof) + self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, url=ctrl_addr_pub, + log=self._log, prof=self._prof) self._prof.prof('setup_done', uid=self._uid) @@ -641,7 +636,8 @@ def remove_pilots(self, pilot_ids, drain=False): # -------------------------------------------------------------------------- # - def _control_cb(self, topic, msg): + # FIXME RPC + def control_cb(self, topic, msg): cmd = msg['cmd'] arg = msg['arg'] diff --git a/src/radical/pilot/tmgr/scheduler/base.py b/src/radical/pilot/tmgr/scheduler/base.py index ca4cf1ba2b..7b308d4420 100644 --- a/src/radical/pilot/tmgr/scheduler/base.py +++ b/src/radical/pilot/tmgr/scheduler/base.py @@ -70,10 +70,6 @@ def initialize(self): # don't. Either way, we here subscribe to state updates. self.register_subscriber(rpc.STATE_PUBSUB, self._base_state_cb) - # Schedulers use that command channel to get information about - # pilots being added or removed. - self.register_subscriber(rpc.CONTROL_PUBSUB, self._base_control_cb) - # cache the local client sandbox to avoid repeated os calls self._client_sandbox = os.getcwd() @@ -199,7 +195,7 @@ def update_tasks(self, tasks): # -------------------------------------------------------------------------- # - def _base_control_cb(self, topic, msg): + def control_cb(self, topic, msg): # we'll wait for commands from the tmgr, to learn about pilots we can # use or we should stop using. We also track task cancelation, as all @@ -209,13 +205,13 @@ def _base_control_cb(self, topic, msg): cmd = msg['cmd'] - self._log.debug('got cmd %s', cmd) + self._log.debug('=== got cmd %s', cmd) if cmd not in ['add_pilots', 'remove_pilots', 'cancel_tasks']: return True - arg = msg['arg'] - tmgr = arg['tmgr'] + arg = msg['arg'] + tmgr = arg['tmgr'] self._log.info('scheduler command: %s: %s' % (cmd, arg)) diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index 6bc4cefd87..d49d2780d6 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -68,10 +68,6 @@ def initialize(self): self.register_output(rps.AGENT_STAGING_INPUT_PENDING, rpc.PROXY_TASK_QUEUE) - # we subscribe to the command channel to learn about pilots being added - # to this task manager. - self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) - self._mkdir_threshold = self.cfg.get('task_bulk_mkdir_threshold', TASK_BULK_MKDIR_THRESHOLD) @@ -86,7 +82,7 @@ def finalize(self): # -------------------------------------------------------------------------- # - def _control_cb(self, topic, msg): + def control_cb(self, topic, msg): # keep track of `add_pilots` commands and updates self._pilots # accordingly. @@ -131,6 +127,9 @@ def _advance_tasks(self, tasks, pid=None, state=None, push=True): # perform and publish state update # push to the proxy queue + for task in tasks: + self._log.debug('====== push to proxy: %s', task['uid']) + self.advance(tasks, state, publish=True, push=push, qname=pid) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 6f6a53c813..84f3361953 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -4,7 +4,9 @@ # pylint: disable=global-statement # W0603 global `_components` +import io import os +import sys import copy import time @@ -14,7 +16,7 @@ from .. import constants as rpc from .. import states as rps -from .rpc_helper import RPCHelper +from ..messages import RPCRequestMessage, RPCResultMessage # ------------------------------------------------------------------------------ @@ -184,15 +186,15 @@ def __init__(self, cfg, session): self._reg = self._session._reg - self._inputs = dict() # queues to get things from - self._outputs = dict() # queues to send things to - self._workers = dict() # methods to work on things - self._publishers = dict() # channels to send notifications to - self._threads = dict() # subscriber and idler threads - self._cb_lock = mt.RLock() # guard threaded callback invokations - self._work_lock = mt.RLock() # guard threaded callback invokations - - self._subscribers = dict() # ZMQ Subscriber classes + self._inputs = dict() # queues to get things from + self._outputs = dict() # queues to send things to + self._workers = dict() # methods to work on things + self._publishers = dict() # channels to send notifications to + self._threads = dict() # subscriber and idler threads + self._cb_lock = mt.RLock() # guard threaded callback invokations + self._rpc_lock = mt.RLock() # guard threaded rpc calls + self._rpc_handlers = dict() # RPC handler methods + self._subscribers = dict() # ZMQ Subscriber classes if self._owner == self.uid: self._owner = 'root' @@ -310,10 +312,25 @@ def __str__(self): # -------------------------------------------------------------------------- # - def _cancel_monitor_cb(self, topic, msg): + def control_cb(self, topic, msg): + ''' + This callback can be overloaded by the component to handle any control + message which is not already handled by the component base class. + ''' + cmd = msg['cmd'] + self._log.debug('=== got cmd %s - ignored', cmd) + pass + + + # -------------------------------------------------------------------------- + # + def _control_cb(self, topic, msg): ''' We listen on the control channel for cancel requests, and append any - found UIDs to our cancel list. + found UIDs to our cancel list. We also listen for RPC requests and + handle any registered RPC handlers. All other control messages are + passed on to the `control_cb` handler which can be overloaded by + component implementations. ''' # FIXME: We do not check for types of things to cancel - the UIDs are @@ -321,7 +338,14 @@ def _cancel_monitor_cb(self, topic, msg): # currently have no abstract 'cancel' command, but instead use # 'cancel_tasks'. - self._log.debug_9('command incoming: %s', msg) + # try to handle message as RPC message + if self._handle_zmq_msg(msg): + + # handled successfully + return + + # handle any other message types + self._log.debug('=== command incoming: %s', msg) cmd = msg['cmd'] arg = msg['arg'] @@ -338,16 +362,88 @@ def _cancel_monitor_cb(self, topic, msg): with self._cancel_lock: self._cancel_list += uids - if cmd == 'terminate': + elif cmd == 'terminate': self._log.info('got termination command') self.stop() - # else: - # self._log.debug('command ignored: %s', cmd) + else: + self._log.debug('command handled by implementation: %s', cmd) + self.control_cb(topic, msg) return True + # -------------------------------------------------------------------------- + # + def _handle_zmq_msg(self, msg_data): + + try: + msg = ru.zmq.Message.deserialize(msg_data) + self._log.debug('deserialized msg type: %s', type(msg)) + + except Exception as e: + self._log.debug('no zmq msg type: %s', msg_data) + return False + + if isinstance(msg, RPCRequestMessage): + self._handle_rpc_msg(msg) + return True + + else: + # we do not handle other message types right now + return False + + + # -------------------------------------------------------------------------- + # + def _handle_rpc_msg(self, msg): + + bakout = sys.stdout + bakerr = sys.stderr + + strout = None + strerr = None + + val = None + out = None + err = None + exc = None + + if msg.cmd not in self._rpc_handlers: + # this RPC message is *silently* ignored + self._log.debug('no rpc handler for [%s])', msg.cmd) + return + + try: + self._log.debug('rpc handler for %s: %s(%s, %s)', msg.cmd, + self._rpc_handlers[msg.cmd], + *msg.args, **msg.kwargs) + + sys.stdout = strout = io.StringIO() + sys.stderr = strerr = io.StringIO() + + val = self._rpc_handlers[msg.cmd](*msg.args, **msg.kwargs) + out = strout.getvalue() + err = strerr.getvalue() + + except Exception as e: + self._log.exception('rpc call failed: %s' % (msg)) + val = None + out = strout.getvalue() + err = strerr.getvalue() + exc = (repr(e), '\n'.join(ru.get_exception_trace())) + + finally: + # restore stdio + sys.stdout = bakout + sys.stderr = bakerr + + rep = RPCResultMessage(rpc_req=msg, val=val, out=out, err=err, exc=exc) + self._log.debug('rpc reply: %s', rep) + + return rep + + # -------------------------------------------------------------------------- # @property @@ -379,10 +475,10 @@ def _initialize(self): self.register_publisher(rpc.STATE_PUBSUB) self.register_publisher(rpc.CONTROL_PUBSUB) - # set controller callback to handle cancellation requests + # set controller callback to handle cancellation requests and RPCs self._cancel_list = list() self._cancel_lock = mt.RLock() - self.register_subscriber(rpc.CONTROL_PUBSUB, self._cancel_monitor_cb) + self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) # call component level initialize self.initialize() @@ -875,8 +971,7 @@ def work_cb(self): # for thing in things: # self._log.debug('got %s (%s)', thing['uid'], state) - with self._work_lock: - self._workers[state](things) + self._workers[state](things) except Exception as e: diff --git a/src/radical/pilot/utils/component_manager.py b/src/radical/pilot/utils/component_manager.py index bfa7a3d2c0..467a512c87 100644 --- a/src/radical/pilot/utils/component_manager.py +++ b/src/radical/pilot/utils/component_manager.py @@ -72,6 +72,7 @@ def __init__(self, sid, reg_addr, owner): # def _hb_msg_cb(self, topic, msg): + self._log.debug('==== got hb msg %s', msg) hb_msg = HeartbeatMessage(from_dict=msg) self._heartbeats[hb_msg.uid] = time.time() @@ -103,6 +104,8 @@ def _wait_startup(self, uids, timeout): time.sleep(0.25) + self._log.debug('===== wait for done: %s', ok) + # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/utils/rpc_helper.py b/src/radical/pilot/utils/rpc_helper.py index b3ba63531f..f046e83653 100644 --- a/src/radical/pilot/utils/rpc_helper.py +++ b/src/radical/pilot/utils/rpc_helper.py @@ -25,8 +25,9 @@ class RPCHelper(object): # -------------------------------------------------------------------------- # - def __init__(self, ctrl_addr_pub, ctrl_addr_sub, log, prof): + def __init__(self, owner, ctrl_addr_pub, ctrl_addr_sub, log, prof): + self._owner = owner # used for uid scope self._addr_pub = ctrl_addr_pub self._addr_sub = ctrl_addr_sub @@ -52,7 +53,7 @@ def __init__(self, ctrl_addr_pub, ctrl_addr_sub, log, prof): # def request(self, cmd, *args, **kwargs): - rid = ru.generate_id('rpc') + rid = ru.generate_id('%s.rpc' % self._owner) req = RPCRequestMessage(uid=rid, cmd=cmd, args=args, kwargs=kwargs) self._active = rid @@ -117,7 +118,7 @@ def _work(self): with self._lock: if msg.cmd in self._handlers: - rep = self._handle_request(msg) + rep = self.handle_request(msg) pub.put(CONTROL_PUBSUB, rep) else: self._log.debug_2('no rpc handler for %s', msg.cmd) @@ -132,47 +133,6 @@ def _work(self): self._queue.put(msg) - # -------------------------------------------------------------------------- - # - def _handle_request(self, msg): - - bakout = sys.stdout - bakerr = sys.stderr - - strout = None - strerr = None - - val = None - out = None - err = None - exc = None - - try: - self._log.debug_2('rpc handler: %s(%s, %s)', - self._handlers[msg.cmd], *msg.args, **msg.kwargs) - - sys.stdout = strout = io.StringIO() - sys.stderr = strerr = io.StringIO() - - val = self._handlers[msg.cmd](*msg.args, **msg.kwargs) - out = strout.getvalue() - err = strerr.getvalue() - - except Exception as e: - self._log.exception('rpc call failed: %s' % (msg)) - val = None - out = strout.getvalue() - err = strerr.getvalue() - exc = (repr(e), '\n'.join(ru.get_exception_trace())) - - finally: - # restore stdio - sys.stdout = bakout - sys.stderr = bakerr - - return RPCResultMessage(rpc_req=msg, val=val, out=out, err=err, exc=exc) - - # -------------------------------------------------------------------------- # def add_handler(self, cmd, handler): From a5e65328fd884a4fb4ddb792b346d5f94c23577d Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Mon, 18 Sep 2023 17:39:16 +0200 Subject: [PATCH 163/171] fix RPC calls, fix log level setting --- src/radical/pilot/agent/agent_0.py | 30 +++--- src/radical/pilot/agent/scheduler/base.py | 4 + src/radical/pilot/pilot.py | 42 ++++---- src/radical/pilot/pilot_manager.py | 2 +- src/radical/pilot/pmgr/launching/base.py | 3 +- src/radical/pilot/proxy.py | 1 + src/radical/pilot/raptor/worker.py | 1 + src/radical/pilot/session.py | 50 ++++----- src/radical/pilot/tmgr/scheduler/base.py | 2 +- .../pilot/tmgr/staging_input/default.py | 2 +- src/radical/pilot/utils/component.py | 101 +++++++++++++----- src/radical/pilot/utils/component_manager.py | 11 +- 12 files changed, 151 insertions(+), 98 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index fe4ae4ac39..df11cdd5c4 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -82,7 +82,7 @@ def __init__(self): # def _proxy_input_cb(self, msg): - self._log.debug('====== proxy input cb: %s', len(msg)) + self._log.debug_8('proxy input cb: %s', len(msg)) to_advance = list() @@ -202,16 +202,7 @@ def initialize(self): self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.PROXY_TASK_QUEUE) - # hook into the control pubsub for rpc handling - ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] - ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] - - self._rpc_helper = rpu.RPCHelper(owner=self._uid, - ctrl_addr_pub=ctrl_addr_pub, - ctrl_addr_sub=ctrl_addr_sub, - log=self._log, prof=self._prof) - - self._rpc_helper.add_handler('prepare_env', self._prepare_env) + self.register_rpc_handler('prepare_env', self._prepare_env) # before we run any tasks, prepare a named_env `rp` for tasks which use # the pilot's own environment, such as raptors @@ -222,7 +213,9 @@ def initialize(self): 'export PATH=%s' % os.environ.get('PATH', '')] } - self._rpc_helper.request('prepare_env', env_name='rp', env_spec=env_spec) + + + self.rpc('prepare_env', env_name='rp', env_spec=env_spec) # start any services if they are requested self._start_services() @@ -232,6 +225,8 @@ def initialize(self): rm_info = self._rm.info n_nodes = len(rm_info['node_list']) + self._log.debug('advance to PMGR_ACTIVE') + pilot = {'$all' : True, # pass full info to client side 'type' : 'pilot', 'uid' : self._pid, @@ -545,7 +540,7 @@ def control_cb(self, topic, msg): requests to handle. ''' - self._log.debug('==== %s: %s', topic, msg) + self._log.debug_1('control msg %s: %s', topic, msg) cmd = msg['cmd'] arg = msg['arg'] @@ -553,7 +548,6 @@ def control_cb(self, topic, msg): self._log.debug('pilot command: %s: %s', cmd, arg) self._prof.prof('cmd', msg="%s : %s" % (cmd, arg), uid=self._pid) - if cmd == 'pmgr_heartbeat' and arg['pmgr'] == self._pmgr: self._session._hb.beat(uid=self._pmgr) return True @@ -598,17 +592,17 @@ def _ctrl_service_up(self, msg): if uid not in self._service_uids_launched: # we do not know this service instance - self._log.warn('=== ignore service startup signal for %s', uid) + self._log.warn('ignore service startup signal for %s', uid) return True if uid in self._service_uids_running: - self._log.warn('=== duplicated service startup signal for %s', uid) + self._log.warn('duplicated service startup signal for %s', uid) return True - self._log.debug('=== service startup message for %s', uid) + self._log.debug('service startup message for %s', uid) self._service_uids_running.append(uid) - self._log.debug('=== service %s started (%s / %s)', uid, + self._log.debug('service %s started (%s / %s)', uid, len(self._service_uids_running), len(self._service_uids_launched)) diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index bff452efa7..1ce47ba1bd 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -326,6 +326,7 @@ def control_cb(self, topic, msg): if cmd == 'register_named_env': + env_name = arg['env_name'] self._named_envs.append(env_name) @@ -656,6 +657,9 @@ def _schedule_tasks(self): self.register_output(rps.AGENT_EXECUTING_PENDING, rpc.AGENT_EXECUTING_QUEUE) + # re-register the control callback in this subprocess + self.register_subscriber(rpc.CONTROL_PUBSUB, self._control_cb) + self._publishers = dict() self.register_publisher(rpc.STATE_PUBSUB) diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index 8797d95e91..c0f34566ca 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -162,6 +162,10 @@ def __init__(self, pmgr: PilotManager, descr): # hook into the control pubsub for rpc handling self._rpc_reqs = dict() ctrl_addr_sub = self._session._reg['bridges.control_pubsub.addr_sub'] + ctrl_addr_pub = self._session._reg['bridges.control_pubsub.addr_pub'] + + self._ctrl_pub = ru.zmq.Publisher(rpc.CONTROL_PUBSUB, url=ctrl_addr_pub, + log=self._log, prof=self._prof) ru.zmq.Subscriber(rpc.CONTROL_PUBSUB, url=ctrl_addr_sub, log=self._log, prof=self._prof, cb=self._control_cb, @@ -700,8 +704,7 @@ def prepare_env(self, env_name, env_spec): """ - self.rpc('prepare_env', {'env_name': env_name, - 'env_spec': env_spec}) + self.rpc('prepare_env', env_name=env_name, env_spec=env_spec) # -------------------------------------------------------------------------- @@ -738,19 +741,22 @@ def _control_cb(self, topic, msg_data): try: msg = ru.zmq.Message.deserialize(msg_data) - self._log.debug('=== rpc res: %s', msg) - if msg.uid in self._rpc_reqs: - self._rpc_reqs[msg.uid]['res'] = msg - self._rpc_reqs[msg.uid]['evt'].set() + if isinstance(msg, RPCResultMessage): + + self._log.debug_4('handle rpc result %s', msg) + + if msg.uid in self._rpc_reqs: + self._rpc_reqs[msg.uid]['res'] = msg + self._rpc_reqs[msg.uid]['evt'].set() except: - self._log.debug('=== ignore msg %s', msg_data) + pass # -------------------------------------------------------------------------- # - def rpc(self, cmd, args=None, kwargs=None): + def rpc(self, cmd, *args, **kwargs): '''Remote procedure call. Send am RPC command and arguments to the pilot and wait for the @@ -758,15 +764,13 @@ def rpc(self, cmd, args=None, kwargs=None): thread safe to have multiple concurrent RPC calls. ''' - self._log.debug('=== pilot in %s state', self.state) + # RPC's can only be handled in `PMGR_ACTIVE` state + # FIXME: RPCs will hang vorever if the pilot dies after sending the msg self.wait(rps.PMGR_ACTIVE) - self._log.debug('=== pilot now in %s state', self.state) - - if not args: - args = dict() rpc_id = ru.generate_id('%s.rpc' % self._uid) - rpc_req = RPCRequestMessage(uid=rpc_id, cmd=cmd, args=args) + rpc_req = RPCRequestMessage(uid=rpc_id, cmd=cmd, args=args, + kwargs=kwargs) self._rpc_reqs[rpc_id] = { 'req': rpc_req, @@ -775,18 +779,18 @@ def rpc(self, cmd, args=None, kwargs=None): 'time': time.time(), } - self._log.debug('=== wait for rpc request %s', rpc_req) + self._ctrl_pub.put(rpc.CONTROL_PUBSUB, rpc_req) + while True: - if not self._rpc_reqs[rpc_id]['evt'].wait(timeout=10): - self._log.debug('=== still waiting for rpc request %s', rpc_id) + if not self._rpc_reqs[rpc_id]['evt'].wait(timeout=60): + self._log.debug('still waiting for rpc request %s', rpc_id) continue rpc_res = self._rpc_reqs[rpc_id]['res'] - self._log.debug('=== rpc result: %s', rpc_res) if rpc_res.exc: - raise RuntimeError('=== rpc failed: %s' % rpc_res.exc) + raise RuntimeError('rpc failed: %s' % rpc_res.exc) return rpc_res.val diff --git a/src/radical/pilot/pilot_manager.py b/src/radical/pilot/pilot_manager.py index 354bd176bc..63c1428616 100644 --- a/src/radical/pilot/pilot_manager.py +++ b/src/radical/pilot/pilot_manager.py @@ -832,7 +832,7 @@ def cancel_pilots(self, uids=None, _timeout=None): # send the cancellation request to the pilots # FIXME: MongoDB # self._session._dbs.pilot_command('cancel_pilot', [], uids) - self._log.debug('=== issue cancel_pilots for %s', uids) + self._log.debug('issue cancel_pilots for %s', uids) self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'cancel_pilots', 'arg' : {'pmgr' : self.uid, 'uids' : uids}}) diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index 9a725beaf8..525cb6874e 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -886,7 +886,8 @@ def _prepare_pilot(self, resource, rcfg, pilot, expand, tar_name): agent_cfg['task_post_launch'] = task_post_launch agent_cfg['task_post_exec'] = task_post_exec agent_cfg['resource_cfg'] = copy.deepcopy(rcfg) - agent_cfg['debug'] = self._log.getEffectiveLevel() + agent_cfg['log_lvl'] = self._log.level + agent_cfg['debug_lvl'] = self._log.debug_level agent_cfg['services'] = services pilot['cfg'] = agent_cfg diff --git a/src/radical/pilot/proxy.py b/src/radical/pilot/proxy.py index 777545b03c..88c028934d 100644 --- a/src/radical/pilot/proxy.py +++ b/src/radical/pilot/proxy.py @@ -231,6 +231,7 @@ def _register(self, arg): # def _worker(self, sid, q, term): + # FIXME: log level etc log = ru.Logger('radical.pilot.bridge', level='debug', path=sid) proxy_cp = None diff --git a/src/radical/pilot/raptor/worker.py b/src/radical/pilot/raptor/worker.py index 9fb85d468b..8638e4ae04 100644 --- a/src/radical/pilot/raptor/worker.py +++ b/src/radical/pilot/raptor/worker.py @@ -45,6 +45,7 @@ def __init__(self, manager, rank, raptor_id): self._log = ru.Logger(name=self._uid, ns='radical.pilot.worker', level=self._cfg.log_lvl, + debug=self._cfg.debug_lvl, targets=self._cfg.log_tgt, path=self._cfg.path) self._prof = ru.Profiler(name=self._uid, ns='radical.pilot.worker', diff --git a/src/radical/pilot/session.py b/src/radical/pilot/session.py index 19ee982565..8b898f9b9e 100644 --- a/src/radical/pilot/session.py +++ b/src/radical/pilot/session.py @@ -186,29 +186,10 @@ def __init__(self, proxy_url: Optional[str ] = None, # initialization is different for each session type # NOTE: we could refactor this to session sub-classes - if self._role == self._PRIMARY: - - # if user did not set a uid, we need to generate a new ID - if not self._uid: - self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) - - self._init_primary() - - - elif self._role == self._AGENT_0: - - self._init_agent_0() - - - elif self._role == self._AGENT_N: - - self._init_agent_n() - - - else: - - self._init_default() - + if self._role == self._PRIMARY: self._init_primary() + elif self._role == self._AGENT_0: self._init_agent_0() + elif self._role == self._AGENT_N: self._init_agent_n() + else : self._init_default() # now we have config and uid - initialize base class (saga session) rs.Session.__init__(self, uid=self._uid) @@ -244,6 +225,10 @@ def _init_primary(self): # - pushes bridge and component configs into that registry # - starts a ZMQ proxy (or ensures one is up and running) + # if user did not set a uid, we need to generate a new ID + if not self._uid: + self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) + # we still call `_init_cfg` to complete missing config settings # FIXME: completion only needed by `PRIMARY` self._init_cfg_from_scratch() @@ -434,7 +419,8 @@ def _init_cfg_from_scratch(self): self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid, - level=self._cfg.get('debug')) + level=self._cfg.get('log_lvl'), + debug=self._cfg.get('debug_lvl')) from . import version_detail as rp_version_detail self._log.info('radical.pilot version: %s', rp_version_detail) @@ -482,7 +468,8 @@ def _init_cfg_from_dict(self): self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid, - level=self._cfg.get('debug')) + level=self._cfg.get('log_lvl'), + debug=self._cfg.get('debug_lvl')) from . import version_detail as rp_version_detail self._log.info('radical.pilot version: %s', rp_version_detail) @@ -511,7 +498,8 @@ def _init_cfg_from_registry(self): self._prof = self._get_profiler(name=self._uid) self._rep = self._get_reporter(name=self._uid) self._log = self._get_logger (name=self._uid, - level=self._cfg.get('debug')) + level=self._cfg.get('log_lvl'), + debug=self._cfg.get('debug_lvl')) from . import version_detail as rp_version_detail self._log.info('radical.pilot version: %s', rp_version_detail) @@ -576,7 +564,7 @@ def _hb_term_cb(hb_uid): # -------------------------------------- # create heartbeat manager which monitors all components in this session - # self._log.debug('=== hb %s from session', self._uid) + # self._log.debug('hb %s from session', self._uid) self._hb = ru.Heartbeat(uid=self._uid, timeout=self._cfg.heartbeat.timeout, interval=self._cfg.heartbeat.interval, @@ -1032,14 +1020,16 @@ def cmgr(self): # -------------------------------------------------------------------------- # - def _get_logger(self, name, level=None): + def _get_logger(self, name, level=None, debug=None): """Get the Logger instance. This is a thin wrapper around `ru.Logger()` which makes sure that log files end up in a separate directory with the name of `session.uid`. """ - return ru.Logger(name=name, ns='radical.pilot', path=self._cfg.path, - targets=['.'], level=level) + log = ru.Logger(name=name, ns='radical.pilot', path=self._cfg.path, + targets=['.'], level=level, debug=debug) + + return log # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/tmgr/scheduler/base.py b/src/radical/pilot/tmgr/scheduler/base.py index 7b308d4420..4403fcdbc8 100644 --- a/src/radical/pilot/tmgr/scheduler/base.py +++ b/src/radical/pilot/tmgr/scheduler/base.py @@ -205,7 +205,7 @@ def control_cb(self, topic, msg): cmd = msg['cmd'] - self._log.debug('=== got cmd %s', cmd) + self._log.debug('got cmd %s', cmd) if cmd not in ['add_pilots', 'remove_pilots', 'cancel_tasks']: return True diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index d49d2780d6..fa2783f412 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -128,7 +128,7 @@ def _advance_tasks(self, tasks, pid=None, state=None, push=True): # perform and publish state update # push to the proxy queue for task in tasks: - self._log.debug('====== push to proxy: %s', task['uid']) + self._log.debug_8('push to proxy: %s', task['uid']) self.advance(tasks, state, publish=True, push=push, qname=pid) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 84f3361953..1d2f2fc47e 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -179,7 +179,6 @@ def __init__(self, cfg, session): assert self._uid, 'Component needs a uid (%s)' % type(self) # state we carry over the fork - self._debug = self._cfg.get('debug') self._owner = self._cfg.get('owner', self.uid) self._ctype = "%s.%s" % (self.__class__.__module__, self.__class__.__name__) @@ -193,6 +192,7 @@ def __init__(self, cfg, session): self._threads = dict() # subscriber and idler threads self._cb_lock = mt.RLock() # guard threaded callback invokations self._rpc_lock = mt.RLock() # guard threaded rpc calls + self._rpc_reqs = dict() # currently active RPC requests self._rpc_handlers = dict() # RPC handler methods self._subscribers = dict() # ZMQ Subscriber classes @@ -201,7 +201,8 @@ def __init__(self, cfg, session): self._prof = self._session._get_profiler(name=self.uid) self._log = self._session._get_logger (name=self.uid, - level=self._debug) + level=self._cfg.get('log_lvl'), + debug=self._cfg.get('debug_lvl')) self._q = None self._in = None @@ -317,8 +318,6 @@ def control_cb(self, topic, msg): This callback can be overloaded by the component to handle any control message which is not already handled by the component base class. ''' - cmd = msg['cmd'] - self._log.debug('=== got cmd %s - ignored', cmd) pass @@ -339,13 +338,17 @@ def _control_cb(self, topic, msg): # 'cancel_tasks'. # try to handle message as RPC message - if self._handle_zmq_msg(msg): - + try: + self._handle_zmq_msg(msg) # handled successfully return + except: + # coult not be handled - fall through to legacy handlers + pass + # handle any other message types - self._log.debug('=== command incoming: %s', msg) + self._log.debug_5('command incoming: %s', msg) cmd = msg['cmd'] arg = msg['arg'] @@ -367,7 +370,7 @@ def _control_cb(self, topic, msg): self.stop() else: - self._log.debug('command handled by implementation: %s', cmd) + self._log.debug_1('command handled by implementation: %s', cmd) self.control_cb(topic, msg) return True @@ -377,27 +380,30 @@ def _control_cb(self, topic, msg): # def _handle_zmq_msg(self, msg_data): - try: - msg = ru.zmq.Message.deserialize(msg_data) - self._log.debug('deserialized msg type: %s', type(msg)) - - except Exception as e: - self._log.debug('no zmq msg type: %s', msg_data) - return False + msg = ru.zmq.Message.deserialize(msg_data) if isinstance(msg, RPCRequestMessage): + self._log.debug_4('handle rpc request %s', msg) self._handle_rpc_msg(msg) - return True + + elif isinstance(msg, RPCResultMessage): + + if msg.uid in self._rpc_reqs: + self._log.debug_4('handle rpc result %s', msg) + self._rpc_reqs[msg.uid]['res'] = msg + self._rpc_reqs[msg.uid]['evt'].set() else: - # we do not handle other message types right now - return False + raise ValueError('message type not handled') + # -------------------------------------------------------------------------- # def _handle_rpc_msg(self, msg): + self._log.debug('handle rpc request: %s', msg) + bakout = sys.stdout bakerr = sys.stderr @@ -415,9 +421,8 @@ def _handle_rpc_msg(self, msg): return try: - self._log.debug('rpc handler for %s: %s(%s, %s)', msg.cmd, - self._rpc_handlers[msg.cmd], - *msg.args, **msg.kwargs) + self._log.debug('rpc handler for %s: %s', + msg.cmd, self._rpc_handlers[msg.cmd]) sys.stdout = strout = io.StringIO() sys.stderr = strerr = io.StringIO() @@ -438,10 +443,58 @@ def _handle_rpc_msg(self, msg): sys.stdout = bakout sys.stderr = bakerr - rep = RPCResultMessage(rpc_req=msg, val=val, out=out, err=err, exc=exc) - self._log.debug('rpc reply: %s', rep) + rpc_rep = RPCResultMessage(rpc_req=msg, val=val, out=out, err=err, exc=exc) + self._log.debug_3('rpc reply: %s', rpc_rep) + + self.publish(rpc.CONTROL_PUBSUB, rpc_rep) + + + # -------------------------------------------------------------------------- + # + def register_rpc_handler(self, cmd, handler): + + self._rpc_handlers[cmd] = handler + + + # -------------------------------------------------------------------------- + # + def rpc(self, cmd, *args, **kwargs): + '''Remote procedure call. + + Send am RPC command and arguments to the control pubsub and wait for the + response. This is a synchronous operation at this point, and it is not + thread safe to have multiple concurrent RPC calls. + ''' + + self._log.debug_5('rpc call %s(%s, %s)', cmd, args, kwargs) + + rpc_id = ru.generate_id('%s.rpc' % self._uid) + rpc_req = RPCRequestMessage(uid=rpc_id, cmd=cmd, + args=args, kwargs=kwargs) + + self._rpc_reqs[rpc_id] = { + 'req': rpc_req, + 'res': None, + 'evt': mt.Event(), + 'time': time.time(), + } + self.publish(rpc.CONTROL_PUBSUB, rpc_req) + + while True: + + if not self._rpc_reqs[rpc_id]['evt'].wait(timeout=60): + self._log.debug_4('still waiting for rpc request %s', rpc_id) + continue + + rpc_res = self._rpc_reqs[rpc_id]['res'] + self._log.debug_4('rpc result: %s', rpc_res) + + del self._rpc_reqs[rpc_id] + + if rpc_res.exc: + raise RuntimeError('rpc failed: %s' % rpc_res.exc) - return rep + return rpc_res.val # -------------------------------------------------------------------------- diff --git a/src/radical/pilot/utils/component_manager.py b/src/radical/pilot/utils/component_manager.py index 467a512c87..3319e03f4d 100644 --- a/src/radical/pilot/utils/component_manager.py +++ b/src/radical/pilot/utils/component_manager.py @@ -51,7 +51,9 @@ def __init__(self, sid, reg_addr, owner): self._prof = ru.Profiler(self._uid, ns='radical.pilot', path=self._cfg.path) self._log = ru.Logger(self._uid, ns='radical.pilot', - path=self._cfg.path) + path=self._cfg.path, + level=self._cfg.log_lvl, + debug=self._cfg.debug_lvl) self._prof.prof('init2', uid=self._uid, msg=self._cfg.path) @@ -72,7 +74,6 @@ def __init__(self, sid, reg_addr, owner): # def _hb_msg_cb(self, topic, msg): - self._log.debug('==== got hb msg %s', msg) hb_msg = HeartbeatMessage(from_dict=msg) self._heartbeats[hb_msg.uid] = time.time() @@ -104,7 +105,7 @@ def _wait_startup(self, uids, timeout): time.sleep(0.25) - self._log.debug('===== wait for done: %s', ok) + self._log.debug('wait for done: %s', ok) # -------------------------------------------------------------------------- @@ -133,6 +134,8 @@ def start_bridges(self, bridges): bcfg.sid = self._cfg.sid bcfg.path = self._cfg.path bcfg.reg_addr = self._cfg.reg_addr + bcfg.log_lvl = self._cfg.log_lvl + bcfg.debug_lvl = self._cfg.debug_lvl bcfg.heartbeat = self._hb_cfg self._reg['bridges.%s.cfg' % bname] = bcfg @@ -180,6 +183,8 @@ def start_components(self, components, cfg = None): ccfg.path = self._cfg.path ccfg.reg_addr = self._cfg.reg_addr ccfg.proxy_url = self._cfg.proxy_url + ccfg.log_lvl = self._cfg.log_lvl + ccfg.debug_lvl = self._cfg.debug_lvl ccfg.heartbeat = self._hb_cfg if cfg: From ca5c400da623e2c219b4f6a9aa3cf6186473a7e2 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 21 Sep 2023 18:23:19 +0200 Subject: [PATCH 164/171] shield agains missing mpi --- docs/source/tutorials/describing_tasks.ipynb | 666 ++----------------- 1 file changed, 69 insertions(+), 597 deletions(-) diff --git a/docs/source/tutorials/describing_tasks.ipynb b/docs/source/tutorials/describing_tasks.ipynb index 50e4eaf7f8..703d4687a2 100644 --- a/docs/source/tutorials/describing_tasks.ipynb +++ b/docs/source/tutorials/describing_tasks.ipynb @@ -27,27 +27,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "c8b8387d", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:26:46.316432Z", - "iopub.status.busy": "2023-05-18T01:26:46.316106Z", - "iopub.status.idle": "2023-05-18T01:26:46.451071Z", - "shell.execute_reply": "2023-05-18T01:26:46.450250Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'/home/mturilli/ve-notebooks'" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": {}, + "outputs": [], "source": [ "import os\n", "import sys\n", @@ -64,6 +47,31 @@ "display(ve_path)\n" ] }, + { + "cell_type": "markdown", + "id": "c2c6a8bf-3128-42a6-932e-32fdf9f87c55", + "metadata": {}, + "source": [ + "
\n", + " \n", + "__Warning:__ MPI ranks can only be used when the `mpiexec` or `mpirun` commands are available - we test this here to avoid runtime errors\n", + "\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6983867-7111-4cbb-ac66-def4075765e4", + "metadata": {}, + "outputs": [], + "source": [ + "max_ranks = 4\n", + "\n", + "if not ru.which(['mpirun', 'mpiexec']):\n", + " max_ranks = 1" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -77,36 +85,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "7e4566d0", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:26:46.455734Z", - "iopub.status.busy": "2023-05-18T01:26:46.455532Z", - "iopub.status.idle": "2023-05-18T01:27:19.693837Z", - "shell.execute_reply": "2023-05-18T01:27:19.692492Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[94m\u001b[1m\n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m================================================================================\n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m Tutorial: Describing Tasks (RP version 1.34.0) \n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m================================================================================\n", - "\u001b[39m\u001b[0m\u001b[94m\u001b[1m\n", - "\u001b[39m\u001b[0m\u001b[94mnew session: \u001b[39m\u001b[0m[rp.session.three.mturilli.019495.0002]\u001b[39m\u001b[0m\u001b[94m \\\n", - "database : \u001b[39m\u001b[0m[mongodb://rct-tutorial:****@95.217.193.116:27017/rct-tutorial]\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mcreate pilot manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mcreate task manager\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94msubmit 1 pilot(s)\u001b[39m\u001b[0m\n", - " pilot.0000 local.localhost 32 cores 1 gpus\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m\u001b[94mpilot state: PMGR_ACTIVE\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "# configure reporter output \n", "report = ru.Reporter(name='radical.pilot')\n", @@ -155,26 +137,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "1ba782cd", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:19.698223Z", - "iopub.status.busy": "2023-05-18T01:27:19.697879Z", - "iopub.status.idle": "2023-05-18T01:27:19.742619Z", - "shell.execute_reply": "2023-05-18T01:27:19.741824Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "# create a minimal executable task\n", "td = rp.TaskDescription({'executable': '/bin/date'})\n", @@ -192,38 +158,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "5f2ea29b", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:19.745416Z", - "iopub.status.busy": "2023-05-18T01:27:19.745184Z", - "iopub.status.idle": "2023-05-18T01:27:25.293949Z", - "shell.execute_reply": "2023-05-18T01:27:25.293306Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "wait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "data": { - "text/plain": [ - "['DONE']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": {}, + "outputs": [], "source": [ "tmgr.wait_tasks()" ] @@ -249,44 +187,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "e7a7d0ac", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:25.296464Z", - "iopub.status.busy": "2023-05-18T01:27:25.296248Z", - "iopub.status.idle": "2023-05-18T01:27:25.315995Z", - "shell.execute_reply": "2023-05-18T01:27:25.315222Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "uid : task.000000\n", - "\u001b[39m\u001b[0mtmgr : tmgr.0000\n", - "\u001b[39m\u001b[0mpilot : pilot.0000\n", - "\u001b[39m\u001b[0mname : \n", - "\u001b[39m\u001b[0mexecutable : /bin/date\n", - "\u001b[39m\u001b[0mstate : DONE\n", - "\u001b[39m\u001b[0mexit_code : 0\n", - "\u001b[39m\u001b[0mstdout : Thu May 18 03:27:23 AM CEST 2023\n", - "\u001b[39m\u001b[0mstderr : \n", - "\u001b[39m\u001b[0mreturn_value : None\n", - "\u001b[39m\u001b[0mexception : None\n", - "\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mendpoint_fs : file://localhost/\n", - "\u001b[39m\u001b[0mresource_sandbox: file://localhost/home/mturilli/radical.pilot.sandbox\n", - "\u001b[39m\u001b[0msession_sandbox : file://localhost/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002\n", - "\u001b[39m\u001b[0mpilot_sandbox : file://localhost/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/pilot.0000/\n", - "\u001b[39m\u001b[0mtask_sandbox : file://localhost/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/pilot.0000/task.000000/\n", - "\u001b[39m\u001b[0mclient_sandbox : /home/mturilli/github/radical.pilot/docs/source/tutorials\n", - "\u001b[39m\u001b[0mmetadata : None\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "report.plain('uid : %s\\n' % task.uid)\n", "report.plain('tmgr : %s\\n' % task.tmgr.uid)\n", @@ -328,27 +232,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "32c95d9a", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:25.319462Z", - "iopub.status.busy": "2023-05-18T01:27:25.319172Z", - "iopub.status.idle": "2023-05-18T01:27:25.439540Z", - "shell.execute_reply": "2023-05-18T01:27:25.438841Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "create: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0msubmit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "import string\n", "letters = string.ascii_lowercase + string.ascii_uppercase\n", @@ -377,38 +264,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "fa13837b", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:25.441944Z", - "iopub.status.busy": "2023-05-18T01:27:25.441799Z", - "iopub.status.idle": "2023-05-18T01:27:31.296128Z", - "shell.execute_reply": "2023-05-18T01:27:31.295235Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "wait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 3\n", - "\u001b[39m\u001b[0m\u001b[94m\tFAILED : 49\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "task.000021: ['-u']: Thu May 18 01:27:29 AM UTC 2023\n", - "task.000035: ['-I']: 2023-05-18\n", - "task.000044: ['-R']: Thu, 18 May 2023 03:27:29 +0200\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "tmgr.wait_tasks([task.uid for task in tasks])\n", "\n", @@ -428,133 +287,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "a3708cb3", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:31.299523Z", - "iopub.status.busy": "2023-05-18T01:27:31.298749Z", - "iopub.status.idle": "2023-05-18T01:27:31.447222Z", - "shell.execute_reply": "2023-05-18T01:27:31.446657Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "wait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 3\n", - "\u001b[39m\u001b[0m\u001b[94m\tFAILED : 49\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "task.000001: ['-a']: /bin/date: invalid option -- 'a'\n", - "Try '/bin/date --help' for more information.\n", - "task.000002: ['-b']: /bin/date: invalid option -- 'b'\n", - "Try '/bin/date --help' for more information.\n", - "task.000003: ['-c']: /bin/date: invalid option -- 'c'\n", - "Try '/bin/date --help' for more information.\n", - "task.000004: ['-d']: /bin/date: option requires an argument -- 'd'\n", - "Try '/bin/date --help' for more information.\n", - "task.000005: ['-e']: /bin/date: invalid option -- 'e'\n", - "Try '/bin/date --help' for more information.\n", - "task.000006: ['-f']: /bin/date: option requires an argument -- 'f'\n", - "Try '/bin/date --help' for more information.\n", - "task.000007: ['-g']: /bin/date: invalid option -- 'g'\n", - "Try '/bin/date --help' for more information.\n", - "task.000008: ['-h']: /bin/date: invalid option -- 'h'\n", - "Try '/bin/date --help' for more information.\n", - "task.000009: ['-i']: /bin/date: invalid option -- 'i'\n", - "Try '/bin/date --help' for more information.\n", - "task.000010: ['-j']: /bin/date: invalid option -- 'j'\n", - "Try '/bin/date --help' for more information.\n", - "task.000011: ['-k']: /bin/date: invalid option -- 'k'\n", - "Try '/bin/date --help' for more information.\n", - "task.000012: ['-l']: /bin/date: invalid option -- 'l'\n", - "Try '/bin/date --help' for more information.\n", - "task.000013: ['-m']: /bin/date: invalid option -- 'm'\n", - "Try '/bin/date --help' for more information.\n", - "task.000014: ['-n']: /bin/date: invalid option -- 'n'\n", - "Try '/bin/date --help' for more information.\n", - "task.000015: ['-o']: /bin/date: invalid option -- 'o'\n", - "Try '/bin/date --help' for more information.\n", - "task.000016: ['-p']: /bin/date: invalid option -- 'p'\n", - "Try '/bin/date --help' for more information.\n", - "task.000017: ['-q']: /bin/date: invalid option -- 'q'\n", - "Try '/bin/date --help' for more information.\n", - "task.000018: ['-r']: /bin/date: option requires an argument -- 'r'\n", - "Try '/bin/date --help' for more information.\n", - "task.000019: ['-s']: /bin/date: option requires an argument -- 's'\n", - "Try '/bin/date --help' for more information.\n", - "task.000020: ['-t']: /bin/date: invalid option -- 't'\n", - "Try '/bin/date --help' for more information.\n", - "task.000022: ['-v']: /bin/date: invalid option -- 'v'\n", - "Try '/bin/date --help' for more information.\n", - "task.000023: ['-w']: /bin/date: invalid option -- 'w'\n", - "Try '/bin/date --help' for more information.\n", - "task.000024: ['-x']: /bin/date: invalid option -- 'x'\n", - "Try '/bin/date --help' for more information.\n", - "task.000025: ['-y']: /bin/date: invalid option -- 'y'\n", - "Try '/bin/date --help' for more information.\n", - "task.000026: ['-z']: /bin/date: invalid option -- 'z'\n", - "Try '/bin/date --help' for more information.\n", - "task.000027: ['-A']: /bin/date: invalid option -- 'A'\n", - "Try '/bin/date --help' for more information.\n", - "task.000028: ['-B']: /bin/date: invalid option -- 'B'\n", - "Try '/bin/date --help' for more information.\n", - "task.000029: ['-C']: /bin/date: invalid option -- 'C'\n", - "Try '/bin/date --help' for more information.\n", - "task.000030: ['-D']: /bin/date: invalid option -- 'D'\n", - "Try '/bin/date --help' for more information.\n", - "task.000031: ['-E']: /bin/date: invalid option -- 'E'\n", - "Try '/bin/date --help' for more information.\n", - "task.000032: ['-F']: /bin/date: invalid option -- 'F'\n", - "Try '/bin/date --help' for more information.\n", - "task.000033: ['-G']: /bin/date: invalid option -- 'G'\n", - "Try '/bin/date --help' for more information.\n", - "task.000034: ['-H']: /bin/date: invalid option -- 'H'\n", - "Try '/bin/date --help' for more information.\n", - "task.000036: ['-J']: /bin/date: invalid option -- 'J'\n", - "Try '/bin/date --help' for more information.\n", - "task.000037: ['-K']: /bin/date: invalid option -- 'K'\n", - "Try '/bin/date --help' for more information.\n", - "task.000038: ['-L']: /bin/date: invalid option -- 'L'\n", - "Try '/bin/date --help' for more information.\n", - "task.000039: ['-M']: /bin/date: invalid option -- 'M'\n", - "Try '/bin/date --help' for more information.\n", - "task.000040: ['-N']: /bin/date: invalid option -- 'N'\n", - "Try '/bin/date --help' for more information.\n", - "task.000041: ['-O']: /bin/date: invalid option -- 'O'\n", - "Try '/bin/date --help' for more information.\n", - "task.000042: ['-P']: /bin/date: invalid option -- 'P'\n", - "Try '/bin/date --help' for more information.\n", - "task.000043: ['-Q']: /bin/date: invalid option -- 'Q'\n", - "Try '/bin/date --help' for more information.\n", - "task.000045: ['-S']: /bin/date: invalid option -- 'S'\n", - "Try '/bin/date --help' for more information.\n", - "task.000046: ['-T']: /bin/date: invalid option -- 'T'\n", - "Try '/bin/date --help' for more information.\n", - "task.000047: ['-U']: /bin/date: invalid option -- 'U'\n", - "Try '/bin/date --help' for more information.\n", - "task.000048: ['-V']: /bin/date: invalid option -- 'V'\n", - "Try '/bin/date --help' for more information.\n", - "task.000049: ['-W']: /bin/date: invalid option -- 'W'\n", - "Try '/bin/date --help' for more information.\n", - "task.000050: ['-X']: /bin/date: invalid option -- 'X'\n", - "Try '/bin/date --help' for more information.\n", - "task.000051: ['-Y']: /bin/date: invalid option -- 'Y'\n", - "Try '/bin/date --help' for more information.\n", - "task.000052: ['-Z']: /bin/date: invalid option -- 'Z'\n", - "Try '/bin/date --help' for more information.\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "tmgr.wait_tasks([task.uid for task in tasks])\n", "\n", @@ -606,120 +342,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "9047b209", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:31.450159Z", - "iopub.status.busy": "2023-05-18T01:27:31.449859Z", - "iopub.status.idle": "2023-05-18T01:27:39.266337Z", - "shell.execute_reply": "2023-05-18T01:27:39.265474Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - ".\u001b[39m\u001b[0m.\u001b[39m\u001b[0m.\u001b[39m\u001b[0m.\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0msubmit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 4\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--- task.000053:\n", - "0 : PID : 1284029\n", - "0 : NODE : three\n", - "0 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 1\n", - "0 : SLEEP : 1\n", - "\n", - "--- task.000054:\n", - "0 : PID : 1284080\n", - "0 : NODE : three\n", - "0 : CPUS : 0000000000000000000000000000000100000000000000000000000000000001\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 2\n", - "0 : SLEEP : 2\n", - "1 : PID : 1284086\n", - "1 : NODE : three\n", - "1 : CPUS : 0000000000000000000000000000001000000000000000000000000000000010\n", - "1 : GPUS : 0\n", - "1 : RANK : 1\n", - "1 : THREADS : 2\n", - "1 : SLEEP : 2\n", - "\n", - "--- task.000055:\n", - "1 : PID : 1284190\n", - "1 : NODE : three\n", - "1 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "1 : GPUS : 0\n", - "1 : RANK : 1\n", - "1 : THREADS : 3\n", - "1 : SLEEP : 3\n", - "2 : PID : 1284205\n", - "2 : NODE : three\n", - "2 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "2 : GPUS : 0\n", - "2 : RANK : 2\n", - "2 : THREADS : 3\n", - "2 : SLEEP : 3\n", - "0 : PID : 1284167\n", - "0 : NODE : three\n", - "0 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 3\n", - "0 : SLEEP : 3\n", - "\n", - "--- task.000056:\n", - "3 : PID : 1284214\n", - "3 : NODE : three\n", - "3 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "3 : GPUS : 0\n", - "3 : RANK : 3\n", - "3 : THREADS : 4\n", - "3 : SLEEP : 4\n", - "0 : PID : 1284157\n", - "0 : NODE : three\n", - "0 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "0 : GPUS : 0\n", - "0 : RANK : 0\n", - "0 : THREADS : 4\n", - "0 : SLEEP : 4\n", - "1 : PID : 1284180\n", - "1 : NODE : three\n", - "1 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "1 : GPUS : 0\n", - "1 : RANK : 1\n", - "1 : THREADS : 4\n", - "1 : SLEEP : 4\n", - "2 : PID : 1284192\n", - "2 : NODE : three\n", - "2 : CPUS : 1111111111111111111111111111111111111111111111111111111111111111\n", - "2 : GPUS : 0\n", - "2 : RANK : 2\n", - "2 : THREADS : 4\n", - "2 : SLEEP : 4\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "tds = list()\n", "for n in range(4):\n", " tds.append(rp.TaskDescription({'executable' : ve_path + '/bin/radical-pilot-hello.sh',\n", " 'arguments' : [n + 1], \n", - " 'ranks' : (n + 1), \n", + " 'ranks' : min(max_ranks, (n + 1)), \n", " 'cores_per_rank': (n + 1),\n", " 'threading_type': rp.OpenMP}))\n", " report.progress()\n", @@ -753,41 +385,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "0fd464ed", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:39.271133Z", - "iopub.status.busy": "2023-05-18T01:27:39.270540Z", - "iopub.status.idle": "2023-05-18T01:27:45.174660Z", - "shell.execute_reply": "2023-05-18T01:27:45.173670Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-rw-rw---- 1 mturilli mturilli 24 May 18 03:27 /tmp/output.test.dat\n", - "-rw-rw---- 1 mturilli mturilli 0 May 18 03:27 /tmp/output.test.err\n", - "-rw-rw---- 1 mturilli mturilli 0 May 18 03:27 /tmp/output.test.out\n", - "\n", - " 61 104 3465\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "\n", "td = rp.TaskDescription({'executable' : '/bin/sh',\n", @@ -851,55 +452,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "059fa07e", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:45.178679Z", - "iopub.status.busy": "2023-05-18T01:27:45.177969Z", - "iopub.status.idle": "2023-05-18T01:27:49.365187Z", - "shell.execute_reply": "2023-05-18T01:27:49.364347Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[... CONTENT SHORTENED ...]\n", - "EL_ADDR=144.76.72.175:27017\n", - "RP_BOOTSTRAP_0_REDIR=True\n", - "RP_GTOD=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//gtod\n", - "RP_PILOT_ID=pilot.0000\n", - "RP_PILOT_SANDBOX=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000/\n", - "RP_PROF=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//prof\n", - "RP_PROF_TGT=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//task.000058/task.000058.prof\n", - "RP_RANK=0\n", - "RP_RANKS=1\n", - "RP_RESOURCE=local.localhost\n", - "RP_RESOURCE_SANDBOX=/home/mturilli/radical.pilot.sandbox\n", - "RP_SESSION_ID=rp.session.three.mturilli.019495.0002\n", - "RP_SESSION_SANDBOX=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/\n", - "RP_TASK_ID=task.000058\n", - "RP_TASK_NAME=task.000058\n", - "RP_TASK_SANDBOX=/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002//pilot.0000//task.000058\n", - "RP_VENV_PATH=/home/mturilli/radical.pilot.sandbox/ve.local.localhost.1.34.0\n", - "RP_VENV_TYPE=venv\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "td = rp.TaskDescription({'executable' : '/bin/sh',\n", " 'arguments' : ['-c', 'printf \"FOO=$FOO\\nBAR=$BAR\\nSHELL=$SHELL\\n\"; env | grep RP_ | sort'],\n", @@ -936,48 +492,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "15728941", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:49.368303Z", - "iopub.status.busy": "2023-05-18T01:27:49.367565Z", - "iopub.status.idle": "2023-05-18T01:27:54.344539Z", - "shell.execute_reply": "2023-05-18T01:27:54.343996Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: pyyaml in /home/mturilli/ve-notebooks/lib/python3.10/site-packages (6.0)\n", - "/home/mturilli/ve-notebooks/bin/python3\n", - "Name: PyYAML\n", - "Version: 6.0\n", - "Summary: YAML parser and emitter for Python\n", - "Home-page: https://pyyaml.org/\n", - "Author: Kirill Simonov\n", - "Author-email: xi@resolvent.net\n", - "License: MIT\n", - "Location: /home/mturilli/ve-notebooks/lib/python3.10/site-packages\n", - "Requires: \n", - "Required-by: jupyter-events, jupyter-nbextensions-configurator, myst-parser\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "td = rp.TaskDescription({'pre_exec' : ['. %s/bin/activate' % ve_path, \n", " 'pip install pyyaml'],\n", @@ -1002,38 +520,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "41467fc2", - "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:27:54.346733Z", - "iopub.status.busy": "2023-05-18T01:27:54.346503Z", - "iopub.status.idle": "2023-05-18T01:28:25.421677Z", - "shell.execute_reply": "2023-05-18T01:28:25.420783Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "submit: \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0mwait : \u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m#\u001b[39m\u001b[0m\n", - "\u001b[39m\u001b[0m\u001b[94m\tDONE : 1\n", - "\u001b[39m\u001b[0m\u001b[92m ok\n", - "\u001b[39m\u001b[0m" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/mturilli/radical.pilot.sandbox/rp.session.three.mturilli.019495.0002/pilot.0000/env/rp_named_env.test_env/bin/python3\n", - "psutil 5.9.5\n", - "\n" - ] - } - ], + "metadata": {}, + "outputs": [], "source": [ "\n", "pilot.prepare_env(env_name='test_env', \n", @@ -1051,32 +541,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "9c914fc2", "metadata": { - "execution": { - "iopub.execute_input": "2023-05-18T01:28:25.424512Z", - "iopub.status.busy": "2023-05-18T01:28:25.424291Z", - "iopub.status.idle": "2023-05-18T01:28:25.429186Z", - "shell.execute_reply": "2023-05-18T01:28:25.428627Z" - }, "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[93m\u001b[1m\n", - "\u001b[39m\u001b[0m\u001b[93m\u001b[1m--------------------------------------------------------------------------------\n", - "\u001b[39m\u001b[0m\u001b[93m\u001b[1mfinalize \n", - "\u001b[39m\u001b[0m\u001b[93m\u001b[1m\n", - "\u001b[39m\u001b[0m" - ] - } - ], + "outputs": [], "source": [ "report.header('finalize')\n", "session.close()" @@ -1085,7 +557,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1099,7 +571,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.5" }, "varInspector": { "cols": { From 853d3b4978f8b19c3b2e9714269bf1faccf6110a Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 22 Sep 2023 13:25:26 +0200 Subject: [PATCH 165/171] fixes in control msg handling --- src/radical/pilot/agent/scheduler/base.py | 7 ++++++ src/radical/pilot/raptor/master.py | 5 ++-- src/radical/pilot/raptor/worker.py | 30 ++++++++++++++--------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 1ce47ba1bd..1416f8407f 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -259,6 +259,7 @@ def initialize(self): self.register_subscriber(rpc.AGENT_UNSCHEDULE_PUBSUB, self.unschedule_cb) # start a process to host the actual scheduling algorithm + self._scheduler_process = False self._p = mp.Process(target=self._schedule_tasks) self._p.daemon = True self._p.start() @@ -321,6 +322,10 @@ def control_cb(self, topic, msg): listen on the control channel for raptor queue registration commands ''' + # only the scheduler process listens for control messages + if not self._scheduler_process: + return + cmd = msg['cmd'] arg = msg['arg'] @@ -598,6 +603,8 @@ def _schedule_tasks(self): tasks. ''' + self._scheduler_process = True + # ZMQ endpoints will not have survived the fork. Specifically the # registry client of the component base class will have to reconnect. # Note that `self._reg` of the base class is a *pointer* to the sesison diff --git a/src/radical/pilot/raptor/master.py b/src/radical/pilot/raptor/master.py index 46a1e29225..0f9c789cc6 100644 --- a/src/radical/pilot/raptor/master.py +++ b/src/radical/pilot/raptor/master.py @@ -94,8 +94,9 @@ def __init__(self, cfg: ru.Config = None): rpu.Component.__init__(self, ccfg, self._session) - self.register_publisher(rpc.STATE_PUBSUB) - self.register_publisher(rpc.CONTROL_PUBSUB) + # we never run `self.start()` which is ok - but it means we miss out on + # some of the component initialization. Call it manually thus + self._initialize() # send new worker tasks and agent input staging / agent scheduler self.register_output(rps.AGENT_STAGING_INPUT_PENDING, diff --git a/src/radical/pilot/raptor/worker.py b/src/radical/pilot/raptor/worker.py index 8638e4ae04..cfb97d020d 100644 --- a/src/radical/pilot/raptor/worker.py +++ b/src/radical/pilot/raptor/worker.py @@ -145,21 +145,29 @@ def _hb_worker(self): # -------------------------------------------------------------------------- # - def _state_cb(self, topic, things): + def _state_cb(self, topic, msgs): - for thing in ru.as_list(things): + for msg in ru.as_list(msgs): - uid = thing['uid'] - state = thing['state'] + cmd = msg['cmd'] + arg = msg['arg'] - if uid == self._raptor_id: + if cmd != 'update': + continue - if state in rps.FINAL + [rps.AGENT_STAGING_OUTPUT_PENDING]: - # master completed - terminate this worker - self._log.info('master %s final: %s - terminate', - uid, state) - self.stop() - return False + for thing in arg: + + uid = thing['uid'] + state = thing['state'] + + if uid == self._raptor_id: + + if state in rps.FINAL + [rps.AGENT_STAGING_OUTPUT_PENDING]: + # master completed - terminate this worker + self._log.info('master %s final: %s - terminate', + uid, state) + self.stop() + return False return True From b49bd379c926d7132b959089be7abfb9ab4159df Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 22 Sep 2023 14:52:36 +0200 Subject: [PATCH 166/171] fix tests --- src/radical/pilot/agent/agent_0.py | 2 - src/radical/pilot/agent/executing/base.py | 2 - src/radical/pilot/agent/executing/sleep.py | 2 - src/radical/pilot/agent/scheduler/base.py | 4 +- src/radical/pilot/pmgr/launching/base.py | 2 - src/radical/pilot/tmgr/scheduler/base.py | 2 - .../pilot/tmgr/staging_input/default.py | 2 - src/radical/pilot/utils/component.py | 8 ++- tests/unit_tests/test_agent_0/test_agent_0.py | 71 ++++++------------- tests/unit_tests/test_executing/test_popen.py | 4 +- tests/unit_tests/test_raptor/test_master.py | 3 + tests/unit_tests/test_scheduler/test_base.py | 4 ++ .../unit_tests/test_tmgr/test_tmgr_staging.py | 1 + 13 files changed, 39 insertions(+), 68 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index df11cdd5c4..fc15f2f3ff 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -558,8 +558,6 @@ def control_cb(self, topic, msg): elif cmd == 'service_up': return self._ctrl_service_up(msg) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/agent/executing/base.py b/src/radical/pilot/agent/executing/base.py index 263c2dd3ef..5764d61d9c 100644 --- a/src/radical/pilot/agent/executing/base.py +++ b/src/radical/pilot/agent/executing/base.py @@ -137,8 +137,6 @@ def control_cb(self, topic, msg): for tid in arg['uids']: self.cancel_task(tid) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/agent/executing/sleep.py b/src/radical/pilot/agent/executing/sleep.py index 5940ade971..798e6351fd 100644 --- a/src/radical/pilot/agent/executing/sleep.py +++ b/src/radical/pilot/agent/executing/sleep.py @@ -163,8 +163,6 @@ def control_cb(self, topic, msg): # FIXME: clarify how to cancel tasks pass - return True - # ------------------------------------------------------------------------------ diff --git a/src/radical/pilot/agent/scheduler/base.py b/src/radical/pilot/agent/scheduler/base.py index 1416f8407f..5faef6f2db 100644 --- a/src/radical/pilot/agent/scheduler/base.py +++ b/src/radical/pilot/agent/scheduler/base.py @@ -321,6 +321,7 @@ def control_cb(self, topic, msg): ''' listen on the control channel for raptor queue registration commands ''' + print('----- b', msg) # only the scheduler process listens for control messages if not self._scheduler_process: @@ -399,6 +400,7 @@ def control_cb(self, topic, msg): to_cancel = list() with self._lock: for uid in uids: + print('---------- cancel', uid) if uid in self._waitpool: to_cancel.append(self._waitpool[uid]) del self._waitpool[uid] @@ -420,8 +422,6 @@ def control_cb(self, topic, msg): else: self._log.debug('command ignored: [%s]', cmd) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/pmgr/launching/base.py b/src/radical/pilot/pmgr/launching/base.py index 525cb6874e..96ad355c38 100644 --- a/src/radical/pilot/pmgr/launching/base.py +++ b/src/radical/pilot/pmgr/launching/base.py @@ -239,8 +239,6 @@ def control_cb(self, topic, msg): self._kill_pilots(pids) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/tmgr/scheduler/base.py b/src/radical/pilot/tmgr/scheduler/base.py index 4403fcdbc8..42794711a4 100644 --- a/src/radical/pilot/tmgr/scheduler/base.py +++ b/src/radical/pilot/tmgr/scheduler/base.py @@ -317,8 +317,6 @@ def control_cb(self, topic, msg): # arg={'uids' : to_cancel[pid]}, # pids=pid) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/tmgr/staging_input/default.py b/src/radical/pilot/tmgr/staging_input/default.py index fa2783f412..e4abdce3dc 100644 --- a/src/radical/pilot/tmgr/staging_input/default.py +++ b/src/radical/pilot/tmgr/staging_input/default.py @@ -115,8 +115,6 @@ def control_cb(self, topic, msg): self.publish(rpc.CONTROL_PUBSUB, msg={'cmd': 'pilot_register_ok', 'arg': {'pid': pid}}) - return True - # -------------------------------------------------------------------------- # diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 1d2f2fc47e..2fce57f3d4 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -344,7 +344,7 @@ def _control_cb(self, topic, msg): return except: - # coult not be handled - fall through to legacy handlers + # could not be handled - fall through to legacy handlers pass # handle any other message types @@ -365,6 +365,10 @@ def _control_cb(self, topic, msg): with self._cancel_lock: self._cancel_list += uids + # FIXME RPC: scheduler handles cancelation itself + if 'AgentSchedulingComponent' in repr(self): + self.control_cb(topic, msg) + elif cmd == 'terminate': self._log.info('got termination command') self.stop() @@ -373,8 +377,6 @@ def _control_cb(self, topic, msg): self._log.debug_1('command handled by implementation: %s', cmd) self.control_cb(topic, msg) - return True - # -------------------------------------------------------------------------- # diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index 02fd8fba6a..16e46f171f 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -9,6 +9,8 @@ from unittest import mock, TestCase +from radical.pilot.messages import RPCRequestMessage, RPCResultMessage + import radical.utils as ru import radical.pilot as rp @@ -47,6 +49,8 @@ def setUpClass(cls, *args, **kwargs) -> None: def tearDownClass(cls) -> None: for p in cls._cleanup_files: + if p is None: + continue for f in glob.glob(p): if os.path.isdir(f): try: @@ -65,10 +69,12 @@ def test_check_control_cb(self, mocked_init): def _publish_effect(publish_type, cmd): nonlocal global_control + import pprint + print('=============== pub', pprint.pformat(cmd)) global_control.append((publish_type, cmd)) - def _prepenv_effect(env_id, spec): - return env_id, spec + def _prepenv_effect(env_name, env_spec): + return env_name, env_spec agent_cmp = Agent_0() @@ -78,62 +84,29 @@ def _prepenv_effect(env_id, spec): agent_cmp.publish = mock.MagicMock(side_effect=_publish_effect) agent_cmp._prepare_env = mock.MagicMock(side_effect=_prepenv_effect) + agent_cmp._rpc_handlers = {'prepare_env': agent_cmp._prepare_env} + msg = {'cmd': 'test', 'arg': {'uid': 'rpc.0000', 'rpc': 'bye'} } - self.assertTrue(agent_cmp._control_cb(None, msg)) + self.assertIsNone(agent_cmp._control_cb(None, msg)) self.assertEqual(global_control, []) - msg = {'cmd': 'rpc_req', - 'arg': {'uid': 'rpc.0001', - 'rpc': 'bye'} - } - self.assertTrue(agent_cmp._control_cb(None, msg)) + msg = RPCRequestMessage({'cmd': 'bye', 'kwargs': {'uid': 'rpc.0001'}}) + self.assertIsNone(agent_cmp._control_cb(None, msg)) self.assertEqual(global_control, []) - msg = {'cmd': 'rpc_req', - 'arg': {'uid': 'rpc.0002', - 'rpc': 'hello'} - } - self.assertIsNone(agent_cmp._control_cb(None, msg)) - self.assertEqual(1, len(global_control)) - # format for the raised exception might be a little different based on - # python version, e.g., py36: KeyError('arg',) | py37: KeyError('arg') - self.assertTrue(global_control[0][1] - ['arg']['err'].startswith("KeyError('arg'")) - self.assertEqual('rpc.0002', global_control[0][1]['arg']['uid']) - - msg = {'cmd': 'rpc_req', - 'arg': {'uid': 'rpc.0003', - 'rpc': 'hello', - 'arg': ['World']} - } - self.assertIsNone(agent_cmp._control_cb(None, msg)) - self.assertEqual(global_control[1], ('control_pubsub', - {'cmd': 'rpc_res', - 'arg': {'uid': 'rpc.0003', - 'err': None, - 'out': 'hello World', - 'ret': 0} - })) - - msg = {'cmd': 'rpc_req', - 'arg': {'uid': 'rpc.0004', - 'rpc': 'prepare_env', - 'arg': {'env_name': 'radical', - 'env_spec': 'spec'} - } - } + msg = RPCRequestMessage({'cmd' : 'prepare_env', + 'uid' : 'rpc.0004', + 'kwargs': {'env_name': 'radical', + 'env_spec': 'spec'}}) self.assertIsNone(agent_cmp._control_cb(None, msg)) - self.assertEqual(global_control[2], ('control_pubsub', - {'cmd': 'rpc_res', - 'arg': {'uid': 'rpc.0004', - 'err': None, - 'out': ('radical', - 'spec'), - 'ret': 0} - })) + print('====', global_control, '====') + self.assertEqual(global_control[0], + ('control_pubsub', + RPCResultMessage({'uid': 'rpc.0004', + 'val': ('radical', 'spec')}))) # -------------------------------------------------------------------------- diff --git a/tests/unit_tests/test_executing/test_popen.py b/tests/unit_tests/test_executing/test_popen.py index b18d59d0af..d17a967fce 100755 --- a/tests/unit_tests/test_executing/test_popen.py +++ b/tests/unit_tests/test_executing/test_popen.py @@ -49,10 +49,10 @@ def test_control_cb(self, mocked_logger, mocked_init): pex._watch_queue = queue.Queue() msg = {'cmd': '', 'arg': {'uids': ['task.0000', 'task.0001']}} - self.assertTrue(pex.control_cb(topic=None, msg=msg)) + self.assertIsNone(pex.control_cb(topic=None, msg=msg)) msg['cmd'] = 'cancel_tasks' - self.assertTrue(pex.control_cb(topic=None, msg=msg)) + self.assertIsNone(pex.control_cb(topic=None, msg=msg)) for uid in msg['arg']['uids']: mode, tid = pex._watch_queue.get() self.assertEqual(mode, pex.TO_CANCEL) diff --git a/tests/unit_tests/test_raptor/test_master.py b/tests/unit_tests/test_raptor/test_master.py index d1d133ceb2..81b8fbb103 100644 --- a/tests/unit_tests/test_raptor/test_master.py +++ b/tests/unit_tests/test_raptor/test_master.py @@ -27,6 +27,7 @@ def _init_primary_side_effect(self): self._prof = mock.MagicMock() self._rep = mock.MagicMock() self._reg = mock.MagicMock() + self._uid = 'session.001' # -------------------------------------------------------------------------- @@ -46,6 +47,8 @@ def setUpClass(cls, *args, **kwargs) -> None: def tearDownClass(cls) -> None: for p in cls._cleanup_files: + if p is None: + continue for f in glob.glob(p): if os.path.isdir(f): try: diff --git a/tests/unit_tests/test_scheduler/test_base.py b/tests/unit_tests/test_scheduler/test_base.py index be0f4d7ada..4eaec568bd 100755 --- a/tests/unit_tests/test_scheduler/test_base.py +++ b/tests/unit_tests/test_scheduler/test_base.py @@ -53,6 +53,7 @@ def test_initialize(self, mocked_env_eval, mocked_hostname, mocked_mp, sched.register_subscriber = mock.Mock() sched.nodes = [] sched._partitions = {} + sched._scheduler_process = False sched._session = mock.Mock() @@ -164,9 +165,12 @@ def _log_debug(*args): sched = AgentSchedulingComponent(cfg=None, session=None) sched._log = mock.Mock() sched._log.debug.side_effect = _log_debug + sched._scheduler_process = True sched._lock = mt.Lock() sched._raptor_lock = mt.Lock() + sched._cancel_lock = mt.RLock() + sched._cancel_list = list() task0000 = {} sched._waitpool = {'task.0000': task0000} diff --git a/tests/unit_tests/test_tmgr/test_tmgr_staging.py b/tests/unit_tests/test_tmgr/test_tmgr_staging.py index 5d89cb3522..a5c60e2f58 100644 --- a/tests/unit_tests/test_tmgr/test_tmgr_staging.py +++ b/tests/unit_tests/test_tmgr/test_tmgr_staging.py @@ -53,6 +53,7 @@ def test_si_create(self, mocked_component_init): def test_si_work(self, mocked_si_init): tmgr_si = StageInDefault(cfg={}, session=None) + tmgr_si._log = mock.Mock() tmgr_si._session_sbox = '/tmp' def _mocked_advance(things, state, publish, push, qname=None): From d1ef2b1dd12699fbdec5e621865c9e9a2b481ab7 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 22 Sep 2023 20:34:02 +0200 Subject: [PATCH 167/171] addr rpc --- VERSION | 2 +- examples/03_multiple_pilots.py | 4 ++++ src/radical/pilot/agent/agent_0.py | 5 ++--- src/radical/pilot/pilot.py | 2 +- src/radical/pilot/utils/component.py | 13 ++++++++++--- 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/VERSION b/VERSION index bf50e910e6..32b7211cb6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.37.0 +1.40.0 diff --git a/examples/03_multiple_pilots.py b/examples/03_multiple_pilots.py index 3c7b16cb54..37db17df60 100755 --- a/examples/03_multiple_pilots.py +++ b/examples/03_multiple_pilots.py @@ -71,6 +71,10 @@ # Launch the pilots. pilots = pmgr.submit_pilots(pdescs) + for pilot in pilots: + pilot.prepare_env('numpy_env', {'type' : 'virtualenv', + 'setup': ['numpy']}) + for gen in range(1): diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index fc15f2f3ff..4acb774aa4 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -202,7 +202,8 @@ def initialize(self): self.register_output(rps.TMGR_STAGING_OUTPUT_PENDING, rpc.PROXY_TASK_QUEUE) - self.register_rpc_handler('prepare_env', self._prepare_env) + self.register_rpc_handler('prepare_env', self._prepare_env, + addr=self._pid) # before we run any tasks, prepare a named_env `rp` for tasks which use # the pilot's own environment, such as raptors @@ -213,8 +214,6 @@ def initialize(self): 'export PATH=%s' % os.environ.get('PATH', '')] } - - self.rpc('prepare_env', env_name='rp', env_spec=env_spec) # start any services if they are requested diff --git a/src/radical/pilot/pilot.py b/src/radical/pilot/pilot.py index c0f34566ca..13f9561d32 100644 --- a/src/radical/pilot/pilot.py +++ b/src/radical/pilot/pilot.py @@ -770,7 +770,7 @@ def rpc(self, cmd, *args, **kwargs): rpc_id = ru.generate_id('%s.rpc' % self._uid) rpc_req = RPCRequestMessage(uid=rpc_id, cmd=cmd, args=args, - kwargs=kwargs) + kwargs=kwargs, addr=self.uid) self._rpc_reqs[rpc_id] = { 'req': rpc_req, diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index 2fce57f3d4..ce3e662337 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -422,6 +422,13 @@ def _handle_rpc_msg(self, msg): self._log.debug('no rpc handler for [%s])', msg.cmd) return + rpc_handler, addr = self._rpc_handlers[msg.cmd] + + if msg.addr and msg.addr != addr: + self._log.debug('ignore rpc handler for [%s] [%s])', msg, addr) + return + + try: self._log.debug('rpc handler for %s: %s', msg.cmd, self._rpc_handlers[msg.cmd]) @@ -429,7 +436,7 @@ def _handle_rpc_msg(self, msg): sys.stdout = strout = io.StringIO() sys.stderr = strerr = io.StringIO() - val = self._rpc_handlers[msg.cmd](*msg.args, **msg.kwargs) + val = rpc_handler(*msg.args, **msg.kwargs) out = strout.getvalue() err = strerr.getvalue() @@ -453,9 +460,9 @@ def _handle_rpc_msg(self, msg): # -------------------------------------------------------------------------- # - def register_rpc_handler(self, cmd, handler): + def register_rpc_handler(self, cmd, handler, addr=None): - self._rpc_handlers[cmd] = handler + self._rpc_handlers[cmd] = [handler, addr] # -------------------------------------------------------------------------- From 48d2df49687d186bc5fc7ffae0345907eb67af5e Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Fri, 22 Sep 2023 23:31:04 +0200 Subject: [PATCH 168/171] keep pilot RPCs local --- src/radical/pilot/agent/agent_0.py | 3 ++- src/radical/pilot/utils/component.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index 4acb774aa4..cbd1a0b38a 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -214,7 +214,8 @@ def initialize(self): 'export PATH=%s' % os.environ.get('PATH', '')] } - self.rpc('prepare_env', env_name='rp', env_spec=env_spec) + self.rpc('prepare_env', env_name='rp', env_spec=env_spec, + addr=self._pid) # start any services if they are requested self._start_services() diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index ce3e662337..dd434a7405 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -467,7 +467,7 @@ def register_rpc_handler(self, cmd, handler, addr=None): # -------------------------------------------------------------------------- # - def rpc(self, cmd, *args, **kwargs): + def rpc(self, cmd, addr=None, *args, **kwargs): '''Remote procedure call. Send am RPC command and arguments to the control pubsub and wait for the @@ -479,7 +479,8 @@ def rpc(self, cmd, *args, **kwargs): rpc_id = ru.generate_id('%s.rpc' % self._uid) rpc_req = RPCRequestMessage(uid=rpc_id, cmd=cmd, - args=args, kwargs=kwargs) + args=args, kwargs=kwargs, + addr=addr) self._rpc_reqs[rpc_id] = { 'req': rpc_req, From dd8420ecc793bbced36b3656f98877bf48cfe163 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 23 Sep 2023 00:47:37 +0200 Subject: [PATCH 169/171] fix tests --- src/radical/pilot/agent/agent_0.py | 2 +- src/radical/pilot/utils/component.py | 11 +++++------ tests/unit_tests/test_agent_0/test_agent_0.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/radical/pilot/agent/agent_0.py b/src/radical/pilot/agent/agent_0.py index cbd1a0b38a..c2d2fa84d6 100644 --- a/src/radical/pilot/agent/agent_0.py +++ b/src/radical/pilot/agent/agent_0.py @@ -543,7 +543,7 @@ def control_cb(self, topic, msg): self._log.debug_1('control msg %s: %s', topic, msg) cmd = msg['cmd'] - arg = msg['arg'] + arg = msg.get('arg') self._log.debug('pilot command: %s: %s', cmd, arg) self._prof.prof('cmd', msg="%s : %s" % (cmd, arg), uid=self._pid) diff --git a/src/radical/pilot/utils/component.py b/src/radical/pilot/utils/component.py index dd434a7405..23e593ae07 100644 --- a/src/radical/pilot/utils/component.py +++ b/src/radical/pilot/utils/component.py @@ -351,7 +351,7 @@ def _control_cb(self, topic, msg): self._log.debug_5('command incoming: %s', msg) cmd = msg['cmd'] - arg = msg['arg'] + arg = msg.get('arg') if cmd == 'cancel_tasks': @@ -419,7 +419,7 @@ def _handle_rpc_msg(self, msg): if msg.cmd not in self._rpc_handlers: # this RPC message is *silently* ignored - self._log.debug('no rpc handler for [%s])', msg.cmd) + self._log.debug('no rpc handler for [%s]', msg.cmd) return rpc_handler, addr = self._rpc_handlers[msg.cmd] @@ -428,7 +428,6 @@ def _handle_rpc_msg(self, msg): self._log.debug('ignore rpc handler for [%s] [%s])', msg, addr) return - try: self._log.debug('rpc handler for %s: %s', msg.cmd, self._rpc_handlers[msg.cmd]) @@ -452,10 +451,10 @@ def _handle_rpc_msg(self, msg): sys.stdout = bakout sys.stderr = bakerr - rpc_rep = RPCResultMessage(rpc_req=msg, val=val, out=out, err=err, exc=exc) - self._log.debug_3('rpc reply: %s', rpc_rep) + rpc_res = RPCResultMessage(rpc_req=msg, val=val, out=out, err=err, exc=exc) + self._log.debug_3('rpc reply: %s', rpc_res) - self.publish(rpc.CONTROL_PUBSUB, rpc_rep) + self.publish(rpc.CONTROL_PUBSUB, rpc_res) # -------------------------------------------------------------------------- diff --git a/tests/unit_tests/test_agent_0/test_agent_0.py b/tests/unit_tests/test_agent_0/test_agent_0.py index 16e46f171f..8e35b1a7a3 100755 --- a/tests/unit_tests/test_agent_0/test_agent_0.py +++ b/tests/unit_tests/test_agent_0/test_agent_0.py @@ -84,7 +84,7 @@ def _prepenv_effect(env_name, env_spec): agent_cmp.publish = mock.MagicMock(side_effect=_publish_effect) agent_cmp._prepare_env = mock.MagicMock(side_effect=_prepenv_effect) - agent_cmp._rpc_handlers = {'prepare_env': agent_cmp._prepare_env} + agent_cmp._rpc_handlers = {'prepare_env': (agent_cmp._prepare_env, None)} msg = {'cmd': 'test', 'arg': {'uid': 'rpc.0000', From 6db5982f24abee211cb3706a1e419a870297bae5 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 23 Sep 2023 01:10:41 +0200 Subject: [PATCH 170/171] resolve conflict --- docs/source/tutorials/describing_tasks.ipynb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/source/tutorials/describing_tasks.ipynb b/docs/source/tutorials/describing_tasks.ipynb index fe6042b1db..3b1134ddd3 100644 --- a/docs/source/tutorials/describing_tasks.ipynb +++ b/docs/source/tutorials/describing_tasks.ipynb @@ -558,11 +558,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", -<<<<<<< HEAD "version": "3.9.13" -======= - "version": "3.11.5" ->>>>>>> devel_nodb_2 }, "varInspector": { "cols": { From 5a517f1981565214c154085f68649f1119d23e40 Mon Sep 17 00:00:00 2001 From: Mikhail Titov Date: Wed, 27 Sep 2023 00:10:54 -0400 Subject: [PATCH 171/171] removed excessive line (in `TaskManager.add_pilots` method) --- src/radical/pilot/task_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/radical/pilot/task_manager.py b/src/radical/pilot/task_manager.py index a28406d50f..c7e65f0ed4 100644 --- a/src/radical/pilot/task_manager.py +++ b/src/radical/pilot/task_manager.py @@ -557,7 +557,6 @@ def add_pilots(self, pilots): pilot_docs.append(pilot_dict) # publish to the command channel for the scheduler to pick up - pilot_docs = [pilot.as_dict() for pilot in pilots] self.publish(rpc.CONTROL_PUBSUB, {'cmd' : 'add_pilots', 'arg' : {'pilots': pilot_docs, 'tmgr' : self.uid}})