Skip to content

fix PMIX handling, query PMIX for remaining time if yogrt is missing #267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ OPTION(SCR_BGQ "Enable proper BGQ compilation" OFF)
## PMIx
IF(${SCR_RESOURCE_MANAGER} STREQUAL "PMIX")
FIND_PACKAGE(PMIX REQUIRED)
SET(HAVE_PMIX TRUE)
SET(HAVE_LIBPMIX TRUE)
INCLUDE_DIRECTORIES(${PMIX_INCLUDE_DIRS})
LIST(APPEND SCR_EXTERNAL_LIBS ${PMIX_LIBRARIES})
ENDIF(${SCR_RESOURCE_MANAGER} STREQUAL "PMIX")
Expand Down
File renamed without changes.
57 changes: 52 additions & 5 deletions src/scr_env.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@
#include "cppr.h"
#endif /* HAVE_CPPR */

#ifdef HAVE_PMIX
#ifdef HAVE_LIBPMIX
#include "pmix.h"
#endif /* HAVE_PMIX */
#endif /* HAVE_LIBPMIX */

#ifdef SCR_BGQ
#include "firmware/include/personality.h" /* Personality_t */
Expand All @@ -73,6 +73,28 @@ machine-dependent information.
*/

/* returns the number of seconds remaining in the time allocation */

#ifdef SCR_RESOURCE_MANAGER_PMIX
/* callback for PMIX to query remaining walltime */
static uint32_t seconds_remaining;
static void seconds_remaining_cbfunc(pmix_status_t status, pmix_info_t *info,
size_t ninfo, void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
volatile int *waiting = (volatile int*)cbdata;
/* read time_remaining */
if (info != NULL) {
seconds_remaining = info->value.data.uint32;
}

if (release_fn != NULL) {
release_fn(release_cbdata);
}
*waiting = 0;
}
#endif

long int scr_env_seconds_remaining()
{
/* returning a negative number tells the caller this functionality is disabled */
Expand All @@ -84,6 +106,26 @@ long int scr_env_seconds_remaining()
if (secs < 0) {
secs = 0;
}
#elif defined(SCR_RESOURCE_MANAGER_PMIX)
/* query time remaining */
pmix_status_t retval;
pmix_query_t *query;
const int nq = 1;
PMIX_QUERY_CREATE(query, nq);
if (query != NULL) {
PMIX_ARGV_APPEND(retval, query[0].keys, PMIX_TIME_REMAINING);
if (retval == PMIX_SUCCESS) {
volatile int waiting = 1;
retval = PMIx_Query_info_nb(query, nq, seconds_remaining_cbfunc,
(void*)&waiting);
if (retval == PMIX_SUCCESS) {
while (waiting) {
sleep(1);
}
secs = (long int)seconds_remaining;
}
}
}
#else
char* scr_end_time = getenv("SCR_END_TIME");
if (scr_end_time) {
Expand Down Expand Up @@ -126,8 +168,8 @@ char* scr_env_jobid()
{
char* jobid = NULL;

char* value;
#ifdef SCR_RESOURCE_MANAGER_SLURM
char* value;
/* read $SLURM_JOBID environment variable for jobid string */
if ((value = getenv("SLURM_JOBID")) != NULL) {
jobid = strdup(value);
Expand All @@ -139,6 +181,7 @@ char* scr_env_jobid()
}
#endif
#ifdef SCR_RESOURCE_MANAGER_APRUN
char* value;
/* read $PBS_JOBID environment variable for jobid string */
if ((value = getenv("PBS_JOBID")) != NULL) {
jobid = strdup(value);
Expand Down Expand Up @@ -178,6 +221,7 @@ char* scr_env_jobid()
PMIX_PDATA_FREE(pmix_query_data, 1);
#endif
#ifdef SCR_RESOURCE_MANAGER_LSF
char* value;
/* read $PBS_JOBID environment variable for jobid string */
if ((value = getenv("LSB_JOBID")) != NULL) {
jobid = strdup(value);
Expand Down Expand Up @@ -263,12 +307,12 @@ int scr_env_init(void)

#ifdef SCR_RESOURCE_MANAGER_PMIX
/* init pmix */
pmix_proc_t scr_pmix_proc;
int retval = PMIx_Init(&scr_pmix_proc, NULL, 0);
if (retval != PMIX_SUCCESS) {
scr_err("PMIx_Init failed: rc=%d @ %s:%d",
scr_abort(-1, "PMIx_Init failed: rc=%d @ %s:%d",
retval, __FILE__, __LINE__
);
return SCR_FAILURE;
}
scr_dbg(1, "PMIx_Init succeeded @ %s:%d", __FILE__, __LINE__);
#endif /* SCR_MACHINE_TYPE == SCR_PMIX */
Expand All @@ -289,5 +333,8 @@ int scr_env_init(void)

int scr_env_finalize(void)
{
#ifdef SCR_RESOURCE_MANAGER_PMIX
PMIx_Finalize(NULL, 0);
#endif
return SCR_SUCCESS;
}