Skip to content

Commit

Permalink
Merge pull request #2992 from radical-cybertools/fix/raptor_summit
Browse files Browse the repository at this point in the history
added pre-defined `pre_exec` for Summit
  • Loading branch information
mtitov authored Jul 25, 2023
2 parents f41b5d6 + 2d31008 commit 49f89f4
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 18 deletions.
6 changes: 2 additions & 4 deletions examples/misc/raptor.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
# extra resources for the rp agent (optional)
"nodes_agent" : 0,

# pilot runtime in min
"runtime" : 120,

# task configuration
"cores_per_task" : 2,
"sleep" : 3,
Expand All @@ -28,7 +25,8 @@

"pilot_descr": {
"resource" : "local.localhost",
"runtime" : 60,
# pilot runtime in min
"runtime" : 20,
"access_schema": "local"
},

Expand Down
7 changes: 2 additions & 5 deletions examples/misc/raptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ def task_state_cb(task, state):
pd.cores += nodes_rp * cores_per_node
pd.gpus += nodes_rp * gpus_per_node

pd.runtime = cfg.runtime

pmgr = rp.PilotManager(session=session)
tmgr = rp.TaskManager(session=session)
tmgr.register_callback(task_state_cb)
Expand All @@ -143,7 +141,6 @@ def task_state_cb(task, state):
report.info('Call pilot.prepare_env()... ')
pilot.prepare_env(env_name='ve_raptor',
env_spec={'type' : 'venv',
'path' : '/tmp/ve3',
'setup': [rp.sdist_path,
ru.sdist_path,
'mpi4py']})
Expand Down Expand Up @@ -190,7 +187,7 @@ def task_state_cb(task, state):
states = tmgr.wait_tasks(
uids=[t.uid for t in task],
state=rp.FINAL + [rp.AGENT_EXECUTING],
timeout=60
timeout=300
)
logger.info('Master states: %s', str(states))

Expand Down Expand Up @@ -307,7 +304,7 @@ def task_state_cb(task, state):
tasks = tmgr.submit_tasks(tds)

logger.info('Wait for tasks %s', [t.uid for t in tds])
tmgr.wait_tasks(uids=[t.uid for t in tasks], timeout=300)
tmgr.wait_tasks(uids=[t.uid for t in tasks], timeout=900)

for task in tasks:
report.info('id: %s [%s]:\n out: %s\n ret: %s\n'
Expand Down
3 changes: 3 additions & 0 deletions src/radical/pilot/agent/executing/popen.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,9 @@ def _extend_pre_exec(self, td, ranks=None):
rank_id += 1
td['pre_exec'].append(rank_env)

# pre-defined `pre_exec` per platform configuration
td['pre_exec'].extend(ru.as_list(self._cfg.get('task_pre_exec')))


# --------------------------------------------------------------------------
#
Expand Down
17 changes: 8 additions & 9 deletions src/radical/pilot/configs/resource_ornl.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,7 @@
"ulimit -u 65536"
],
"default_remote_workdir" : "$MEMBERWORK/%(pd.project)s",
"python_dist" : "default",
"virtenv_mode" : "create",
"rp_version" : "local",
"virtenv_mode" : "local",
"cores_per_node" : 42,
"gpus_per_node" : 6,
"lfs_path_per_node" : "/tmp",
Expand All @@ -197,7 +195,8 @@
"options" : ["gpumps", "nvme"],
"blocked_cores" : [],
"blocked_gpus" : []
}
},
"task_pre_exec" : ["export LD_LIBRARY_PATH=/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/container/../lib/pami_port:${LD_LIBRARY_PATH}"]
},

"summit_jsrun": {
Expand Down Expand Up @@ -233,9 +232,7 @@
"ulimit -u 65536"
],
"default_remote_workdir" : "$MEMBERWORK/%(pd.project)s",
"python_dist" : "default",
"virtenv_mode" : "create",
"rp_version" : "local",
"virtenv_mode" : "local",
"cores_per_node" : 42,
"gpus_per_node" : 6,
"lfs_path_per_node" : "/tmp",
Expand All @@ -245,7 +242,8 @@
"options" : ["gpumps", "nvme"],
"blocked_cores" : [],
"blocked_gpus" : []
}
},
"task_pre_exec" : ["export LD_LIBRARY_PATH=/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/container/../lib/pami_port:${LD_LIBRARY_PATH}"]
},

"summit_interactive": {
Expand Down Expand Up @@ -285,7 +283,8 @@
"cores_per_node" : 42,
"gpus_per_node" : 6,
"lfs_path_per_node" : "/tmp",
"lfs_size_per_node" : 0
"lfs_size_per_node" : 0,
"task_pre_exec" : ["export LD_LIBRARY_PATH=/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/container/../lib/pami_port:${LD_LIBRARY_PATH}"]
},

"summit_prte": {
Expand Down
4 changes: 4 additions & 0 deletions tests/unit_tests/test_executing/test_popen.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def test_handle_task(self, mocked_sp_popen, mocked_lm_init,
def test_extend_pre_exec(self, mocked_init):

pex = Popen(cfg=None, session=None)
pex._cfg = {}

td = {'cores_per_rank': 2,
'threading_type': '',
Expand All @@ -144,13 +145,16 @@ def test_extend_pre_exec(self, mocked_init):

pex._extend_pre_exec(td, ranks)
self.assertNotIn('export OMP_NUM_THREADS=2', td['pre_exec'])
self.assertFalse(bool(td['pre_exec']))

td.update({'threading_type': rpc.OpenMP,
'gpu_type' : rpc.CUDA})
pex._cfg['task_pre_exec'] = ['export TEST_ENV=test']

pex._extend_pre_exec(td, ranks)
self.assertIn('export OMP_NUM_THREADS=2', td['pre_exec'])
self.assertIn({'0': 'export CUDA_VISIBLE_DEVICES=5'}, td['pre_exec'])
self.assertIn('export TEST_ENV=test', td['pre_exec'])

# --------------------------------------------------------------------------
#
Expand Down

0 comments on commit 49f89f4

Please sign in to comment.