Skip to content

Commit f1403ac

Browse files
author
Ralph Castain
authored
Merge pull request #3336 from rhc54/topic/launchmon
Update the debugger launch code to reflect the new backend mapping method.
2 parents 5df9567 + 0500cc1 commit f1403ac

File tree

8 files changed

+114
-158
lines changed

8 files changed

+114
-158
lines changed

orte/mca/grpcomm/direct/grpcomm_direct.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
556556
/* now pass the relay buffer to myself for processing - don't
557557
* inject it into the RML system via send as that will compete
558558
* with the relay messages down in the OOB. Instead, pass it
559-
* directly to the orted command processor */
559+
* directly to the RML message processor */
560560
if (ORTE_DAEMON_DVM_NIDMAP_CMD != command) {
561561
ORTE_RML_POST_MESSAGE(ORTE_PROC_MY_NAME, tag, 1,
562562
relay->base_ptr, relay->bytes_used);

orte/mca/odls/base/odls_base_default_fns.c

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
694694
}
695695

696696
/* did the user request we display output in xterms? */
697-
if (NULL != orte_xterm) {
697+
if (NULL != orte_xterm && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
698698
opal_list_item_t *nmitem;
699699
orte_namelist_t *nm;
700700
/* see if this rank is one of those requested */
@@ -740,9 +740,6 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
740740
for (i=0; NULL != app->argv[i]; i++) {
741741
opal_argv_append_nosize(&cd->argv, app->argv[i]);
742742
}
743-
/* the app exe name itself is in the argvsav array, so
744-
* we can recover it from there later
745-
*/
746743
cd->cmd = opal_path_findv(orte_fork_agent[0], X_OK, orte_launch_environ, NULL);
747744
if (NULL == cd->cmd) {
748745
orte_show_help("help-orte-odls-base.txt",
@@ -766,7 +763,7 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
766763
}
767764

768765
/* if we are indexing the argv by rank, do so now */
769-
if (cd->index_argv) {
766+
if (cd->index_argv && !ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
770767
char *param;
771768
asprintf(&param, "%s-%d", cd->argv[0], (int)child->name.vpid);
772769
free(cd->argv[0]);
@@ -1805,12 +1802,6 @@ int orte_odls_base_default_restart_proc(orte_proc_t *child,
18051802
opal_event_set_priority(&cd->ev, ORTE_MSG_PRI);
18061803
opal_event_active(&cd->ev, OPAL_EV_WRITE, 1);
18071804

1808-
if (ORTE_SUCCESS != (rc = fork_local(cd))) {
1809-
orte_wait_cb_cancel(child);
1810-
child->exit_code = ORTE_ERR_SILENT; /* error message already output */
1811-
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START);
1812-
}
1813-
18141805
CLEANUP:
18151806
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
18161807
"%s odls:restart of proc %s %s",

orte/mca/odls/odls_types.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ typedef uint8_t orte_daemon_cmd_flag_t;
9292
/* tell DVM daemons to cleanup resources from job */
9393
#define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34
9494

95-
9695
/*
9796
* Struct written up the pipe from the child to the parent.
9897
*/

orte/mca/rmaps/base/rmaps_base_support_fns.c

Lines changed: 47 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -477,55 +477,60 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
477477
(int)opal_list_get_size(allocated_nodes)));
478478

479479
complete:
480+
num_slots = 0;
480481
/* remove all nodes that are already at max usage, and
481482
* compute the total number of allocated slots while
482-
* we do so */
483-
num_slots = 0;
484-
item = opal_list_get_first(allocated_nodes);
485-
while (item != opal_list_get_end(allocated_nodes)) {
486-
/** save the next pointer in case we remove this node */
487-
next = opal_list_get_next(item);
488-
/** check to see if this node is fully used - remove if so */
489-
node = (orte_node_t*)item;
490-
if (0 != node->slots_max && node->slots_inuse > node->slots_max) {
491-
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
492-
"%s Removing node %s: max %d inuse %d",
493-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
494-
node->name, node->slots_max, node->slots_inuse));
495-
opal_list_remove_item(allocated_nodes, item);
496-
OBJ_RELEASE(item); /* "un-retain" it */
497-
} else if (node->slots <= node->slots_inuse &&
498-
(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
499-
/* remove the node as fully used */
500-
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
501-
"%s Removing node %s slots %d inuse %d",
502-
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
503-
node->name, node->slots, node->slots_inuse));
504-
opal_list_remove_item(allocated_nodes, item);
505-
OBJ_RELEASE(item); /* "un-retain" it */
506-
} else if (node->slots > node->slots_inuse) {
507-
/* add the available slots */
483+
* we do so - can ignore this if we are mapping debugger
484+
* daemons as they do not count against the allocation */
485+
if (ORTE_MAPPING_DEBUGGER & ORTE_GET_MAPPING_DIRECTIVE(policy)) {
486+
num_slots = opal_list_get_size(allocated_nodes); // tell the mapper there is one slot/node for debuggers
487+
} else {
488+
item = opal_list_get_first(allocated_nodes);
489+
while (item != opal_list_get_end(allocated_nodes)) {
490+
/** save the next pointer in case we remove this node */
491+
next = opal_list_get_next(item);
492+
/** check to see if this node is fully used - remove if so */
493+
node = (orte_node_t*)item;
494+
if (0 != node->slots_max && node->slots_inuse > node->slots_max) {
508495
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
509-
"%s node %s has %d slots available",
496+
"%s Removing node %s: max %d inuse %d",
510497
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
511-
node->name, node->slots - node->slots_inuse));
512-
num_slots += node->slots - node->slots_inuse;
513-
} else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
514-
/* nothing needed to do here - we don't add slots to the
515-
* count as we don't have any available. Just let the mapper
516-
* do what it needs to do to meet the request
517-
*/
498+
node->name, node->slots_max, node->slots_inuse));
499+
opal_list_remove_item(allocated_nodes, item);
500+
OBJ_RELEASE(item); /* "un-retain" it */
501+
} else if (node->slots <= node->slots_inuse &&
502+
(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
503+
/* remove the node as fully used */
518504
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
519-
"%s node %s is fully used, but available for oversubscrition",
505+
"%s Removing node %s slots %d inuse %d",
520506
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
521-
node->name));
522-
} else {
523-
/* if we cannot use it, remove it from list */
524-
opal_list_remove_item(allocated_nodes, item);
525-
OBJ_RELEASE(item); /* "un-retain" it */
507+
node->name, node->slots, node->slots_inuse));
508+
opal_list_remove_item(allocated_nodes, item);
509+
OBJ_RELEASE(item); /* "un-retain" it */
510+
} else if (node->slots > node->slots_inuse) {
511+
/* add the available slots */
512+
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
513+
"%s node %s has %d slots available",
514+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
515+
node->name, node->slots - node->slots_inuse));
516+
num_slots += node->slots - node->slots_inuse;
517+
} else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
518+
/* nothing needed to do here - we don't add slots to the
519+
* count as we don't have any available. Just let the mapper
520+
* do what it needs to do to meet the request
521+
*/
522+
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
523+
"%s node %s is fully used, but available for oversubscription",
524+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
525+
node->name));
526+
} else {
527+
/* if we cannot use it, remove it from list */
528+
opal_list_remove_item(allocated_nodes, item);
529+
OBJ_RELEASE(item); /* "un-retain" it */
530+
}
531+
/** go on to next item */
532+
item = next;
526533
}
527-
/** go on to next item */
528-
item = next;
529534
}
530535

531536
/* Sanity check to make sure we have resources available */

orte/mca/rmaps/ppr/rmaps_ppr.c

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -327,47 +327,49 @@ static int ppr_mapper(orte_job_t *jdata)
327327
}
328328
}
329329

330-
/* set the total slots used */
331-
if ((int)node->num_procs <= node->slots) {
332-
node->slots_inuse = (int)node->num_procs;
333-
} else {
334-
node->slots_inuse = node->slots;
335-
}
336-
337-
/* if no-oversubscribe was specified, check to see if
338-
* we have violated the total slot specification - regardless,
339-
* if slots_max was given, we are not allowed to violate it!
340-
*/
341-
if ((node->slots < (int)node->num_procs) ||
342-
(0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
343-
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
344-
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
345-
true, node->num_procs, app->app);
346-
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
347-
rc = ORTE_ERR_SILENT;
348-
goto error;
330+
if (!(ORTE_MAPPING_DEBUGGER & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
331+
/* set the total slots used */
332+
if ((int)node->num_procs <= node->slots) {
333+
node->slots_inuse = (int)node->num_procs;
334+
} else {
335+
node->slots_inuse = node->slots;
349336
}
350-
/* flag the node as oversubscribed so that sched-yield gets
351-
* properly set
337+
338+
/* if no-oversubscribe was specified, check to see if
339+
* we have violated the total slot specification - regardless,
340+
* if slots_max was given, we are not allowed to violate it!
352341
*/
353-
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
354-
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
355-
/* check for permission */
356-
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
357-
/* if we weren't given a directive either way, then we will error out
358-
* as the #slots were specifically given, either by the host RM or
359-
* via hostfile/dash-host */
360-
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
342+
if ((node->slots < (int)node->num_procs) ||
343+
(0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
344+
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
361345
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
362-
true, app->num_procs, app->app);
346+
true, node->num_procs, app->app);
363347
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
364-
return ORTE_ERR_SILENT;
365-
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
366-
/* if we were explicitly told not to oversubscribe, then don't */
367-
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
368-
true, app->num_procs, app->app);
369-
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
370-
return ORTE_ERR_SILENT;
348+
rc = ORTE_ERR_SILENT;
349+
goto error;
350+
}
351+
/* flag the node as oversubscribed so that sched-yield gets
352+
* properly set
353+
*/
354+
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
355+
ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
356+
/* check for permission */
357+
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
358+
/* if we weren't given a directive either way, then we will error out
359+
* as the #slots were specifically given, either by the host RM or
360+
* via hostfile/dash-host */
361+
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
362+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
363+
true, app->num_procs, app->app);
364+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
365+
return ORTE_ERR_SILENT;
366+
} else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
367+
/* if we were explicitly told not to oversubscribe, then don't */
368+
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
369+
true, app->num_procs, app->app);
370+
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
371+
return ORTE_ERR_SILENT;
372+
}
371373
}
372374
}
373375
}

orte/mca/rmaps/rmaps_types.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
1313
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
1414
* reserved.
15-
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1616
* $COPYRIGHT$
1717
*
1818
* Additional copyrights may follow
@@ -91,6 +91,8 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_map_t);
9191
/* an error flag */
9292
#define ORTE_MAPPING_CONFLICTED 0x2000
9393
#define ORTE_MAPPING_GIVEN 0x4000
94+
/* mapping a debugger job */
95+
#define ORTE_MAPPING_DEBUGGER 0x8000
9496
#define ORTE_SET_MAPPING_DIRECTIVE(target, pol) \
9597
(target) |= (pol)
9698
#define ORTE_UNSET_MAPPING_DIRECTIVE(target, pol) \

orte/orted/orted_comm.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,6 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
529529
}
530530
break;
531531

532-
533532
/**** TERMINATE JOB COMMAND ****/
534533
case ORTE_DAEMON_TERMINATE_JOB_CMD:
535534

orte/orted/orted_submit.c

Lines changed: 22 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,15 +2383,13 @@ static void orte_debugger_dump(void)
23832383
"NULL" : (char*) MPIR_server_arguments);
23842384
}
23852385

2386-
static void setup_debugger_job(void)
2386+
static void setup_debugger_job(orte_jobid_t jobid)
23872387
{
23882388
orte_job_t *debugger;
23892389
orte_app_context_t *app;
2390-
orte_proc_t *proc;
2391-
int i, rc;
2392-
orte_node_t *node;
2393-
orte_vpid_t vpid=0;
2390+
int rc;
23942391
char cwd[OPAL_PATH_MAX];
2392+
bool flag = true;
23952393

23962394
/* setup debugger daemon job */
23972395
debugger = OBJ_NEW(orte_job_t);
@@ -2427,68 +2425,28 @@ static void setup_debugger_job(void)
24272425
return;
24282426
}
24292427
app->cwd = strdup(cwd);
2430-
orte_remove_attribute(&app->attributes, ORTE_APP_USER_CWD);
2428+
orte_set_attribute(&app->attributes, ORTE_APP_USER_CWD, ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
24312429
opal_argv_append_nosize(&app->argv, app->app);
24322430
build_debugger_args(app);
24332431
opal_pointer_array_add(debugger->apps, app);
24342432
debugger->num_apps = 1;
2435-
/* create a job map */
2433+
/* create the map object and set the policy to 1ppn */
24362434
debugger->map = OBJ_NEW(orte_job_map_t);
2437-
/* in building the map, we want to launch one debugger daemon
2438-
* on each node that *already has an application process on it*.
2439-
* We cannot just launch one debugger daemon on EVERY node because
2440-
* the original job may not have placed procs on every node. So
2441-
* we construct the map here by cycling across all nodes, adding
2442-
* only those nodes where num_procs > 0.
2443-
*/
2444-
for (i=0; i < orte_node_pool->size; i++) {
2445-
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
2446-
continue;
2447-
}
2448-
/* if this node wasn't included in the vm, ignore it */
2449-
if (NULL == node->daemon) {
2450-
continue;
2451-
}
2452-
/* if the node doesn't have any app procs on it, ignore it */
2453-
if (node->num_procs < 1) {
2454-
continue;
2455-
}
2456-
/* this node has at least one proc, so add it to our map */
2457-
OBJ_RETAIN(node);
2458-
opal_pointer_array_add(debugger->map->nodes, node);
2459-
debugger->map->num_nodes++;
2460-
/* add a debugger daemon to the node - note that the
2461-
* debugger daemon does NOT count against our subscribed slots
2462-
*/
2463-
proc = OBJ_NEW(orte_proc_t);
2464-
proc->name.jobid = debugger->jobid;
2465-
proc->name.vpid = vpid++;
2466-
/* point the proc at the local ORTE daemon as its parent */
2467-
proc->parent = node->daemon->name.vpid;
2468-
/* set the local/node ranks - we don't actually care
2469-
* what these are, but the odls needs them
2470-
*/
2471-
proc->local_rank = 0;
2472-
proc->node_rank = 0;
2473-
proc->app_rank = proc->name.vpid;
2474-
/* flag the proc as ready for launch */
2475-
proc->state = ORTE_PROC_STATE_INIT;
2476-
proc->app_idx = 0;
2477-
2478-
OBJ_RETAIN(node); /* maintain accounting on object */
2479-
proc->node = node;
2480-
/* add the proc to the job */
2481-
opal_pointer_array_set_item(debugger->procs, proc->name.vpid, proc);
2482-
debugger->num_procs++;
2483-
2484-
/* add the proc to the node's array */
2485-
OBJ_RETAIN(proc);
2486-
opal_pointer_array_add(node->procs, (void*)proc);
2487-
node->num_procs++;
2488-
}
2489-
/* schedule it for launch */
2490-
debugger->state = ORTE_JOB_STATE_INIT;
2491-
ORTE_ACTIVATE_JOB_STATE(debugger, ORTE_JOB_STATE_LAUNCH_APPS);
2435+
ORTE_SET_MAPPING_POLICY(debugger->map->mapping, ORTE_MAPPING_PPR);
2436+
ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_GIVEN);
2437+
ORTE_SET_MAPPING_DIRECTIVE(debugger->map->mapping, ORTE_MAPPING_DEBUGGER);
2438+
/* define the ppr */
2439+
debugger->map->ppr = strdup("1:node");
2440+
/* mark that we do not want the daemon bound */
2441+
if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&debugger->map->binding, "none"))) {
2442+
ORTE_ERROR_LOG(rc);
2443+
return;
2444+
}
2445+
/* spawn it */
2446+
rc = orte_plm.spawn(debugger);
2447+
if (ORTE_SUCCESS != rc) {
2448+
ORTE_ERROR_LOG(rc);
2449+
}
24922450
}
24932451

24942452
/*
@@ -2644,7 +2602,7 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
26442602
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
26452603
(NULL == orte_debugger_test_daemon) ?
26462604
MPIR_executable_path : orte_debugger_test_daemon);
2647-
setup_debugger_job();
2605+
setup_debugger_job(jdata->jobid);
26482606
}
26492607
/* we don't have anything else to do */
26502608
OBJ_RELEASE(caddy);
@@ -2936,7 +2894,7 @@ static void attach_debugger(int fd, short event, void *arg)
29362894
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
29372895
(NULL == orte_debugger_test_daemon) ?
29382896
MPIR_executable_path : orte_debugger_test_daemon);
2939-
setup_debugger_job();
2897+
setup_debugger_job(ORTE_JOBID_WILDCARD);
29402898
did_once = true;
29412899
}
29422900

0 commit comments

Comments
 (0)