Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 68 additions & 56 deletions orte/mca/odls/base/odls_base_default_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -1361,21 +1361,22 @@ void odls_base_default_wait_local_proc(orte_proc_t *proc, void* cbdata)
}

typedef struct {
opal_list_item_t super;
orte_proc_t *child;
orte_odls_base_kill_local_fn_t kill_local;
} odls_kill_caddy_t;

static void kill_cbfunc(int fd, short args, void *cbdata)
} orte_odls_quick_caddy_t;
static void qcdcon(orte_odls_quick_caddy_t *p)
{
odls_kill_caddy_t *cd = (odls_kill_caddy_t*)cbdata;

if (!ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_ALIVE) || 0 == cd->child->pid) {
free(cd);
return;
p->child = NULL;
}
static void qcddes(orte_odls_quick_caddy_t *p)
{
if (NULL != p->child) {
OBJ_RELEASE(p->child);
}
cd->kill_local(cd->child->pid, SIGKILL);
free(cd);
}
OBJ_CLASS_INSTANCE(orte_odls_quick_caddy_t,
opal_list_item_t,
qcdcon, qcddes);

int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
orte_odls_base_kill_local_fn_t kill_local,
Expand All @@ -1387,6 +1388,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
int i, j;
opal_pointer_array_t procarray, *procptr;
bool do_cleanup;
orte_odls_quick_caddy_t *cd;

OBJ_CONSTRUCT(&procs_killed, opal_list_t);

Expand Down Expand Up @@ -1503,68 +1505,78 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
*/
orte_wait_cb_cancel(child);

if (!do_cleanup) {
odls_kill_caddy_t *cd;

/* if we are killing only selected procs, then do so in a gentle
fashion. First send a SIGCONT in case the process is in stopped state.
If it is in a stopped state and we do not first change it to
running, then SIGTERM will not get delivered. Ignore return
value. */
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING SIGCONT TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name)));
kill_local(child->pid, SIGCONT);
/* First send a SIGCONT in case the process is in stopped state.
If it is in a stopped state and we do not first change it to
running, then SIGTERM will not get delivered. Ignore return
value. */
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING SIGCONT TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name)));
cd = OBJ_NEW(orte_odls_quick_caddy_t);
OBJ_RETAIN(child);
cd->child = child;
opal_list_append(&procs_killed, &cd->super);
kill_local(child->pid, SIGCONT);
continue;

/* Send a sigterm to the process before sigkill to be nice */
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING SIGTERM TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name)));
kill_local(child->pid, SIGTERM);
/* provide a polite delay so the proc has a chance to react */
cd = (odls_kill_caddy_t*)malloc(sizeof(odls_kill_caddy_t));
OBJ_RETAIN(child); // protect against race conditions
cd->child = child;
cd->kill_local = kill_local;
ORTE_TIMER_EVENT(1, 0, kill_cbfunc, ORTE_SYS_PRI);
continue;
CLEANUP:
/* ensure the child's session directory is cleaned up */
orte_session_dir_finalize(&child->name);
/* check for everything complete - this will remove
* the child object from our local list
*/
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID)) {
ORTE_ACTIVATE_PROC_STATE(&child->name, child->state);
}
}
}

/* Force the SIGKILL just to make sure things are dead
* This fixes an issue that, if the application is masking
* SIGTERM, then the child_died()
* may return 'true' even though waipid returns with 0.
* It does this to avoid a race condition, per documentation
* in odls_default_module.c.
*/
/* if we are issuing signals, then we need to wait a little
* and send the next in sequence */
if (0 < opal_list_get_size(&procs_killed)) {
sleep(orte_odls_globals.timeout_before_sigkill);
/* issue a SIGTERM to all */
OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING FORCE SIGKILL TO %s pid %lu",
"%s SENDING SIGTERM TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name), (unsigned long)child->pid));
kill_local(child->pid, SIGKILL);

ORTE_NAME_PRINT(&child->name)));
kill_local(cd->child->pid, SIGTERM);
}
/* wait a little again */
sleep(orte_odls_globals.timeout_before_sigkill);
/* issue a SIGKILL to all */
OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s SENDING SIGKILL TO %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name)));
kill_local(cd->child->pid, SIGKILL);
/* indicate the waitpid fired as this is effectively what
* has happened
*/
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_WAITPID);
child->pid = 0;
ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_WAITPID);
cd->child->pid = 0;

/* mark the child as "killed" */
cd->child->state = ORTE_PROC_STATE_KILLED_BY_CMD; /* we ordered it to die */

CLEANUP:
/* ensure the child's session directory is cleaned up */
orte_session_dir_finalize(&child->name);
orte_session_dir_finalize(&cd->child->name);
/* check for everything complete - this will remove
* the child object from our local list
*/
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID)) {
ORTE_ACTIVATE_PROC_STATE(&child->name, child->state);
if (ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
ORTE_FLAG_TEST(cd->child, ORTE_PROC_FLAG_WAITPID)) {
ORTE_ACTIVATE_PROC_STATE(&cd->child->name, cd->child->state);
}
}
}
OPAL_LIST_DESTRUCT(&procs_killed);

/* cleanup, if required */
/* cleanup arrays, if required */
if (do_cleanup) {
OBJ_DESTRUCT(&procarray);
OBJ_DESTRUCT(&proctmp);
Expand Down
44 changes: 22 additions & 22 deletions orte/mca/odls/default/odls_default_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -194,18 +194,18 @@ static bool odls_default_child_died(orte_proc_t *child)
* that occasionally causes us to incorrectly report a proc
* as refusing to die. Unfortunately, errno may not be reset
* by waitpid in this case, so we cannot check it.
*
* (note the previous fix to this, to return 'process dead'
* here, fixes the race condition at the cost of reporting
* all live processes have immediately died! Better to
* occasionally report a dead process as still living -
* which will occasionally trip the timeout for cases that
* are right on the edge.)
*
* (note the previous fix to this, to return 'process dead'
* here, fixes the race condition at the cost of reporting
* all live processes have immediately died! Better to
* occasionally report a dead process as still living -
* which will occasionally trip the timeout for cases that
* are right on the edge.)
*/
OPAL_OUTPUT_VERBOSE((20, orte_odls_base_framework.framework_output,
"%s odls:default:WAITPID INDICATES PID %d MAY HAVE ALREADY EXITED",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)(child->pid)));
/* Do nothing, process still alive */
/* Do nothing, process still alive */
} else if (-1 == ret && ECHILD == errno) {
/* The pid no longer exists, so we'll call this "good
enough for government work" */
Expand Down Expand Up @@ -392,12 +392,12 @@ static int do_child(orte_app_context_t* context,
long fd, fdmax = sysconf(_SC_OPEN_MAX);
char *param, *msg;

if (orte_forward_job_control) {
/* Set a new process group for this child, so that a
SIGSTOP can be sent to it without being sent to the
orted. */
setpgid(0, 0);
}
#if HAVE_SETPGID
/* Set a new process group for this child, so that a
SIGSTOP can be sent to it without being sent to the
orted. */
setpgid(0, 0);
#endif

/* Setup the pipe to be close-on-exec */
opal_fd_set_cloexec(write_fd);
Expand Down Expand Up @@ -710,7 +710,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
}

if (pid == 0) {
close(p[0]);
close(p[0]);
#if HAVE_SETPGID
setpgid(0, 0);
#endif
Expand Down Expand Up @@ -760,11 +760,12 @@ static int send_signal(pid_t pid, int signal)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
signal, (long)pid));

if (orte_forward_job_control) {
/* Send the signal to the process group rather than the
process. The child is the leader of its process group. */
pid = -pid;
}
#if HAVE_SETPGID
/* Send the signal to the process group rather than the
process. The child is the leader of its process group. */
pid = -pid;
#endif

if (kill(pid, signal) != 0) {
switch(errno) {
case EINVAL:
Expand Down Expand Up @@ -811,4 +812,3 @@ static int orte_odls_default_restart_proc(orte_proc_t *child)
}
return rc;
}