Skip to content

Commit

Permalink
BF: CS-653 a tightly integrated parallel job running in a pe with job…
Browse files Browse the repository at this point in the history
…_is_first_task = FALSE and with limits set might get killed erroneously
  • Loading branch information
jgabler-hpc committed Oct 5, 2024
1 parent 78935a9 commit b81dfd3
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 8 deletions.
3 changes: 3 additions & 0 deletions doc/markdown/man/man5/sge_conf.md
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,9 @@ enabled in the parallel environment, reserved usage will only be reported by the
No per parallel task usage records will be sent from execd to qmaster, which can significantly reduce load on
qmaster when running large tightly integrated parallel jobs.

Note: The setting only affects the usage reporting. It does not affect the monitoring of limits which is still based
on the real usage of the job.

***USE_QSUB_GID***

If this parameter is set to true, the primary group ID active when a job was submitted will be used as the primary
Expand Down
26 changes: 22 additions & 4 deletions source/daemons/execd/execd_ck_to_do.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ static void force_job_rlimit(const char* qualified_hostname)
lFreeList(&usage_list);
cpu_ep = vmem_ep = nullptr;

bool first_gdil_ep = true;
for_each_ep(gdil_ep, lGetList(jatep, JAT_granted_destin_identifier_list)) {
int nslots=0;
double lim;
char err_str[128];
size_t err_size = sizeof(err_str) - 1;
Expand All @@ -236,7 +236,25 @@ static void force_job_rlimit(const char* qualified_hostname)
continue;
}

nslots = lGetUlong(gdil_ep, JG_slots);
int nslots = lGetUlong(gdil_ep, JG_slots);

// in case of a parallel job
// if job_is_first_task is false, we have no slot for the master task,
// but it still can consume vmem and cpu
// Problem: @todo CS-547 we cannot differentiate between
// - only the master task is running on this host (nslots = 1)
// - the master task + one slave task is running on this host (nslots = 1)
// only in the second case we have to increase nslots,
// but better always increase it and not kill the job erroneously
if (first_gdil_ep) {
first_gdil_ep = false;
const lListElem *pe = lGetObject(jatep, JAT_pe_object);
if (pe != nullptr) {
if (!lGetBool(pe, PE_job_is_first_task)) {
nslots++;
}
}
}

parse_ulong_val(&lim, nullptr, TYPE_TIM, lGetString(q, QU_s_cpu), err_str, err_size);
if (lim == DBL_MAX) {
Expand All @@ -252,14 +270,14 @@ static void force_job_rlimit(const char* qualified_hostname)
h_cpu += lim * nslots;
}

parse_ulong_val(&lim, nullptr, TYPE_TIM, lGetString(q, QU_s_vmem), err_str, err_size);
parse_ulong_val(&lim, nullptr, TYPE_MEM, lGetString(q, QU_s_vmem), err_str, err_size);
if (lim == DBL_MAX) {
s_vmem = DBL_MAX;
} else {
s_vmem += lim * nslots;
}

parse_ulong_val(&lim, nullptr, TYPE_TIM, lGetString(q, QU_h_vmem), err_str, err_size);
parse_ulong_val(&lim, nullptr, TYPE_MEM, lGetString(q, QU_h_vmem), err_str, err_size);
if (lim == DBL_MAX) {
h_vmem = DBL_MAX;
} else {
Expand Down
5 changes: 3 additions & 2 deletions source/daemons/execd/load_avg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,7 @@ calculate_reserved_usage(const char* qualified_hostname, const lListElem *ja_tas

/* We only build new reserved online usage,
* when the final (acct) usage has not yet been generated.
* Otherwise online usage might get higher than final usage!
* Otherwise, online usage might get higher than final usage!
*/
jr = get_job_report(job_id, ja_task_id, pe_task_id);
if (lGetSubStr(jr, UA_name, USAGE_ATTR_CPU_ACCT, JR_usage) == nullptr) {
Expand Down Expand Up @@ -1161,7 +1161,7 @@ static void get_reserved_usage(const char *qualified_hostname, lList **job_usage
*/
pe = lGetObject(ja_task, JAT_pe_object);

/* If we have a pid for the ja_task: it's either a non parallel job
/* If we have a pid for the ja_task: it's either a non-parallel job
* or the master task of a parallel job.
* Produce a usage record for it.
*/
Expand Down Expand Up @@ -1320,6 +1320,7 @@ void build_reserved_usage(const u_long64 now, const lListElem *ja_task, const lL
for_each_ep(gdil_ep, gdil) {
slots = lGetUlong(gdil_ep, JG_slots);
/* respect job_is_first_task, only once (for the master task gdil) */
// @todo CS-547 we cannot differentiate master task only and master task + 1 slave task
if (pe != nullptr && gdil_ep == master_gdil_ep && !lGetBool(pe, PE_job_is_first_task)) {
slots++;
}
Expand Down
3 changes: 1 addition & 2 deletions source/daemons/execd/ptf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ static lListElem *ptf_process_job(osjobid_t os_job_id, const char *task_id_str,
static void ptf_get_usage_from_data_collector()
{
#ifdef USE_DC
DENTER(TOP_LAYER);

lListElem *job, *osjob;
lList *pidlist, *oldpidlist;
Expand All @@ -695,8 +696,6 @@ static void ptf_get_usage_from_data_collector()
const char *tid;
int i, j;

DENTER(TOP_LAYER);

ojobs = jobs = psGetAllJobs();
if (jobs) {
jobcount = *(uint64 *) jobs;
Expand Down

0 comments on commit b81dfd3

Please sign in to comment.