Skip to content

Commit

Permalink
response to comments
Browse files Browse the repository at this point in the history
  • Loading branch information
andre-merzky committed Aug 4, 2023
1 parent 50251d0 commit 97761d9
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 26 deletions.
4 changes: 3 additions & 1 deletion src/radical/pilot/agent/agent_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,9 @@ def _start_sub_agents(self):
if target == 'local':

# start agent locally
cmdline = '/bin/sh -l %s/bootstrap_2.sh %s' % (self._pwd, sa)
bs_name = '%s/bootstrap_2.sh' % (self._pwd)
args = ' '.join([self._sid, self.cfg.reg_addr, sa])
cmdline = '/bin/sh -l %s/%s %s' % (self._pwd, bs_name, args)


else: # target == 'node':
Expand Down
8 changes: 7 additions & 1 deletion src/radical/pilot/agent/executing/popen.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,12 @@ def _get_rp_env(self, task):
if sbox.startswith(self._pwd):
sbox = '$RP_PILOT_SANDBOX%s' % sbox[len(self._pwd):]

gpr = td['gpus_per_rank']
if int(gpr) == gpr:
gpr = '%d' % gpr
else:
gpr = '%f' % gpr

ret = '\n'
ret += 'export RP_TASK_ID="%s"\n' % tid
ret += 'export RP_TASK_NAME="%s"\n' % name
Expand All @@ -560,7 +566,7 @@ def _get_rp_env(self, task):
ret += 'export RP_TASK_SANDBOX="%s"\n' % sbox
ret += 'export RP_REGISTRY_ADDRESS="%s"\n' % self._session.reg_addr
ret += 'export RP_CORES_PER_RANK=%d\n' % td['cores_per_rank']
ret += 'export RP_GPUS_PER_RANK=%d\n' % td['gpus_per_rank']
ret += 'export RP_GPUS_PER_RANK=%s\n' % gpr

# FIXME AM
# ret += 'export RP_LFS="%s"\n' % self.lfs
Expand Down
9 changes: 0 additions & 9 deletions src/radical/pilot/configs/agent_debug_sa.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,6 @@
"stall_hwm" : 1,
"bulk_size" : 1024},

"funcs_wrk_queue" : { "kind" : "queue",
"log_level" : "error",
"stall_hwm" : 1,
"bulk_size" : 0},
"funcs_res_queue" : { "kind" : "queue",
"log_level" : "error",
"stall_hwm" : 1,
"bulk_size" : 0},

"agent_unschedule_pubsub" : { "kind" : "pubsub",
"log_level" : "error",
"stall_hwm" : 1,
Expand Down
25 changes: 11 additions & 14 deletions src/radical/pilot/configs/agent_default.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,21 @@
# stall_hwm and batch_size is 1 (no stalling, no bulking).
#
"bridges" : {
"agent_staging_input_queue" : {"kind": "queue", "log_lvl":"debug"},
"agent_scheduling_queue" : {"kind": "queue", "log_lvl":"debug"},
"agent_executing_queue" : {"kind": "queue", "log_lvl":"debug"},
"agent_staging_output_queue" : {"kind": "queue", "log_lvl":"debug"},
"agent_collecting_queue" : {"kind": "queue", "log_lvl":"debug"},
"agent_staging_input_queue" : {"kind": "queue", "log_lvl":"error"},
"agent_scheduling_queue" : {"kind": "queue", "log_lvl":"error"},
"agent_executing_queue" : {"kind": "queue", "log_lvl":"error"},
"agent_staging_output_queue" : {"kind": "queue", "log_lvl":"error"},
"agent_collecting_queue" : {"kind": "queue", "log_lvl":"error"},

"funcs_req_queue" : {"kind": "queue", "log_lvl":"debug"},
"funcs_res_queue" : {"kind": "queue", "log_lvl":"debug"},
"raptor_scheduling_queue" : {"kind": "queue", "log_lvl":"error"},

"raptor_scheduling_queue" : {"kind": "queue", "log_lvl":"debug"},
"agent_unschedule_pubsub" : {"kind": "pubsub", "log_lvl":"error"},
"agent_schedule_pubsub" : {"kind": "pubsub", "log_lvl":"error"},

"agent_unschedule_pubsub" : {"kind": "pubsub", "log_lvl":"debug"},
"agent_schedule_pubsub" : {"kind": "pubsub", "log_lvl":"debug"},
"control_pubsub" : {"kind": "pubsub", "log_lvl":"error"},
"state_pubsub" : {"kind": "pubsub", "log_lvl":"error"}

"control_pubsub" : {"kind": "pubsub", "log_lvl":"debug"},
"state_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}

# "log_pubsub" : {"kind": "pubsub", "log_lvl":"debug"}
# "log_pubsub" : {"kind": "pubsub", "log_lvl":"error"}
},

"components" : {
Expand Down
2 changes: 1 addition & 1 deletion src/radical/pilot/tmgr/staging_input/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def work(self, tasks):

task_sboxes = sboxes[pid]

if False or len(task_sboxes) >= self._mkdir_threshold:
if len(task_sboxes) >= self._mkdir_threshold:
self._log.debug('tar %d sboxes', len(task_sboxes))

session_sbox = self._session._get_session_sandbox(pilot)
Expand Down

0 comments on commit 97761d9

Please sign in to comment.