Skip to content

Commit

Permalink
BF: CS-649 an invalid epilog does not set a queue error state
Browse files Browse the repository at this point in the history
// + fixed typos + cleanup
  • Loading branch information
jgabler-hpc committed Oct 4, 2024
1 parent 7dce9b3 commit 78935a9
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 13 deletions.
8 changes: 4 additions & 4 deletions source/daemons/execd/reaper_execd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,10 @@ static int clean_up_job(lListElem *jr, int failed, int shepherd_exit_status,
case SSTATE_PROLOG_FAILED:
case SSTATE_BEFORE_PESTART:
case SSTATE_PESTART_FAILED:
case SSTATE_BEFORE_PESTOP:
case SSTATE_PESTOP_FAILED:
case SSTATE_BEFORE_EPILOG:
case SSTATE_EPILOG_FAILED:
general_failure = GFSTATE_QUEUE;
lSetUlong(jr, JR_general_failure, general_failure);
job_related_adminmail(EXECD, jr, is_array, job_owner);
Expand Down Expand Up @@ -770,10 +774,6 @@ static int clean_up_job(lListElem *jr, int failed, int shepherd_exit_status,
** if an error occurred after the job has been run
** it is not as serious
*/
case SSTATE_BEFORE_PESTOP:
case SSTATE_PESTOP_FAILED:
case SSTATE_BEFORE_EPILOG:
case SSTATE_EPILOG_FAILED:
case SSTATE_PROCSET_NOTFREED:
general_failure = GFSTATE_NO_HALT;
lSetUlong(jr, JR_general_failure, general_failure);
Expand Down
2 changes: 1 addition & 1 deletion source/daemons/shepherd/builtin_starter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1420,7 +1420,7 @@ int use_starter_method /* If this flag is set the shellpath contains the
execvp(filename, args);
}

/* Aaaah - execvp() failed */
// execvp() failed
{
char failed_str[2048+128];
snprintf(failed_str, sizeof(failed_str), "%s failed: %s", err_str, strerror(errno));
Expand Down
15 changes: 7 additions & 8 deletions source/daemons/shepherd/shepherd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,9 @@ static int do_epilog(int timeout, int ckpt_type)
prolog_epilog_variables);
exit_status = start_child("epilog", command, nullptr, timeout, ckpt_type);
if (n_exit_status<(i=count_exit_status())) {
shepherd_trace("exit states increased from %d to %d",
n_exit_status, i);
shepherd_trace("exit states increased from %d to %d", n_exit_status, i);
/*
** in this case the child didnt get to the exec call or it failed
** in this case the child didn't get to the exec call, or it failed
** the status that waitpid and finally start_child returns is
** reserved for the exit status of the job
*/
Expand All @@ -465,7 +464,7 @@ static int do_epilog(int timeout, int ckpt_type)
}

if (exit_status) {
switch( exit_status ) {
switch(exit_status) {
case RESCHEDULE_EXIT_STATUS:
shepherd_state = SSTATE_AGAIN;
break;
Expand All @@ -478,10 +477,10 @@ static int do_epilog(int timeout, int ckpt_type)
shepherd_error(0, "exit_status of epilog = %d", exit_status);
return SSTATE_EPILOG_FAILED;
}
}
else
} else {
shepherd_trace("no epilog script to start");

}

return 0;
}

Expand Down Expand Up @@ -1348,7 +1347,7 @@ int ckpt_type
*/
if (!SGE_STAT("exit_status", &buf) && buf.st_size) {
/*
** in this case the child didnt get to the exec call or it failed
** in this case the child didn't get to the exec call, or it failed
** the status that waitpid and finally start_child returns is
** reserved for the exit status of the job
*/
Expand Down

0 comments on commit 78935a9

Please sign in to comment.