1111 * All rights reserved.
1212 * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
1313 * reserved.
14- * Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
14+ * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
1515 * Copyright (c) 2015 Research Organization for Information Science
1616 * and Technology (RIST). All rights reserved.
1717 * $COPYRIGHT$
3030#include "opal/util/if.h"
3131
3232#include "orte/mca/errmgr/errmgr.h"
33+ #include "orte/mca/rmaps/base/base.h"
3334#include "orte/util/name_fns.h"
3435#include "orte/runtime/orte_globals.h"
3536
@@ -46,7 +47,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
4647 int rc , i ;
4748 orte_node_t * node , * hnp_node , * nptr ;
4849 char * ptr ;
49- bool hnp_alone = true;
50+ bool hnp_alone = true, skiphnp = false ;
5051 orte_attribute_t * kv ;
5152 char * * alias = NULL , * * nalias ;
5253
@@ -77,6 +78,33 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
7778
7879 /* get the hnp node's info */
7980 hnp_node = (orte_node_t * )opal_pointer_array_get_item (orte_node_pool , 0 );
81+ #if SLURM_CRAY_ENV
82+ /* if we are in a Cray-SLURM environment, then we cannot
83+ * launch procs local to the HNP. The problem
84+ * is the MPI processes launched on the head node (where the
85+ * ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
86+ * (mpirun) which is not a child of a slurmd daemon. This
87+ * means that any RDMA credentials obtained via the odls/alps
88+ * local launcher are incorrect. Test for this condition. If
89+ * found, then take steps to ensure we launch a daemon on
90+ * the same node as mpirun and that it gets used to fork
91+ * local procs instead of mpirun so they get the proper
92+ * credential */
93+ if (NULL != hnp_node ) {
94+ OPAL_LIST_FOREACH (node , nodes , orte_node_t ) {
95+ if (orte_ifislocal (node -> name )) {
96+ orte_hnp_is_allocated = true;
97+ break ;
98+ }
99+ }
100+ if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping ) & ORTE_MAPPING_NO_USE_LOCAL )) {
101+ hnp_node -> name = strdup ("mpirun" );
102+ skiphnp = true;
103+ ORTE_SET_MAPPING_DIRECTIVE (orte_rmaps_base .mapping , ORTE_MAPPING_NO_USE_LOCAL );
104+ }
105+ }
106+ #endif
107+
80108
81109 /* cycle through the list */
82110 while (NULL != (item = opal_list_remove_first (nodes ))) {
@@ -86,7 +114,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
86114 * first position since it is the first one entered. We need to check to see
87115 * if this node is the same as the HNP's node so we don't double-enter it
88116 */
89- if (NULL != hnp_node && orte_ifislocal (node -> name )) {
117+ if (! skiphnp && NULL != hnp_node && orte_ifislocal (node -> name )) {
90118 OPAL_OUTPUT_VERBOSE ((5 , orte_ras_base_framework .framework_output ,
91119 "%s ras:base:node_insert updating HNP [%s] info to %ld slots" ,
92120 ORTE_NAME_PRINT (ORTE_PROC_MY_NAME ),
@@ -189,7 +217,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
189217 * ensure we don't have any domain info in the node record
190218 * for the hnp
191219 */
192- if (!orte_have_fqdn_allocation && !hnp_alone ) {
220+ if (NULL != hnp_node && !orte_have_fqdn_allocation && !hnp_alone ) {
193221 if (NULL != (ptr = strchr (hnp_node -> name , '.' ))) {
194222 * ptr = '\0' ;
195223 }
0 commit comments