Skip to content

Commit 9164afb

Browse files
author
Ralph Castain
committed
When a daemon force-terminates, we don't get the show_help message it was trying to send because the message is at a lower priority than the termination event. Resolve this by putting the oob in its own progress thread. Also, use only that one thread by default - if someone needs more progress threads in the OOB, they can use the MCA param to get them.
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
1 parent 0650d41 commit 9164afb

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

orte/mca/oob/base/oob_base_frame.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,7 @@ orte_oob_base_t orte_oob_base = {0};
5555

5656
static int orte_oob_base_register(mca_base_register_flag_t flags)
5757
{
58-
if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) {
59-
orte_oob_base.num_threads = 0;
60-
} else {
61-
orte_oob_base.num_threads = 8;
62-
}
58+
orte_oob_base.num_threads = 0;
6359
(void)mca_base_var_register("orte", "oob", "base", "num_progress_threads",
6460
"Number of independent progress OOB messages for each interface",
6561
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@@ -95,6 +91,10 @@ static int orte_oob_base_close(void)
9591
OBJ_RELEASE(cli);
9692
}
9793

94+
if (!ORTE_PROC_IS_APP && !ORTE_PROC_IS_TOOL) {
95+
opal_progress_thread_finalize("OOB-BASE");
96+
}
97+
9898
/* destruct our internal lists */
9999
OBJ_DESTRUCT(&orte_oob_base.actives);
100100

@@ -122,7 +122,11 @@ static int orte_oob_base_open(mca_base_open_flag_t flags)
122122
opal_hash_table_init(&orte_oob_base.peers, 128);
123123
OBJ_CONSTRUCT(&orte_oob_base.actives, opal_list_t);
124124

125-
orte_oob_base.ev_base = orte_event_base;
125+
if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) {
126+
orte_oob_base.ev_base = orte_event_base;
127+
} else {
128+
orte_oob_base.ev_base = opal_progress_thread_init("OOB-BASE");
129+
}
126130

127131

128132
#if OPAL_ENABLE_FT_CR == 1

0 commit comments

Comments
 (0)