From 2aa04ab45c8420d1ba87d8d8d23db0b499b7dbc3 Mon Sep 17 00:00:00 2001 From: Joachim Gabler Date: Wed, 2 Oct 2024 15:15:57 +0200 Subject: [PATCH] BF: CS-635 modifying pe option ign_sreq_on_mhost while a job is using the pe leads to a booking error // second issue: booking of the running job into the resource diagram in scheduler --- .../daemons/qmaster/sge_sched_prepare_data.cc | 1 + source/libs/sched/sge_resource_utilization.cc | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/source/daemons/qmaster/sge_sched_prepare_data.cc b/source/daemons/qmaster/sge_sched_prepare_data.cc index c291bfbd9..f2a7f531f 100644 --- a/source/daemons/qmaster/sge_sched_prepare_data.cc +++ b/source/daemons/qmaster/sge_sched_prepare_data.cc @@ -207,6 +207,7 @@ static const int jat_nm[] = { JAT_fshare, JAT_status, JAT_granted_pe, + JAT_pe_object, JAT_scaled_usage_list, JAT_task_list, JAT_start_time, diff --git a/source/libs/sched/sge_resource_utilization.cc b/source/libs/sched/sge_resource_utilization.cc index 3e2eaff38..824e62a0c 100644 --- a/source/libs/sched/sge_resource_utilization.cc +++ b/source/libs/sched/sge_resource_utilization.cc @@ -1067,15 +1067,13 @@ rqs_add_job_utilization(lListElem *jep, const lListElem *pe, u_long32 task_id, c DRETURN(mods); } -static int -add_job_list_to_schedule(const lList *job_list, bool suspended, lList *pe_list, - lList *host_list, lList *queue_list, lList *rqs_list, - const lList *centry_list, const lList *acl_list, const lList *hgroup_list, - lList *ar_list, bool for_job_scheduling, u_long64 now) +static int +add_job_list_to_schedule(const lList *job_list, bool suspended, lList *host_list, lList *queue_list, lList *rqs_list, + const lList *centry_list, const lList *acl_list, const lList *hgroup_list, lList *ar_list, + bool for_job_scheduling, u_long64 now) { lListElem *jep, *ja_task; lListElem *gep = host_list_locate(host_list, SGE_GLOBAL_NAME); - const char *pe_name; const char *type; u_long32 interval = sconf_get_schedule_interval(); @@ -1118,10 +1116,13 @@ add_job_list_to_schedule(const lList *job_list, bool suspended, lList *pe_list, a.gdil = lGetListRW(ja_task, JAT_granted_destin_identifier_list); a.slots = nslots_granted(a.gdil, nullptr); - if ((pe_name = lGetString(ja_task, JAT_granted_pe)) && - !(a.pe = pe_list_locate(pe_list, pe_name))) { - ERROR(MSG_OBJ_UNABLE2FINDPE_S, pe_name); - continue; + a.pe = lGetObject(ja_task, JAT_pe_object); + const char *pe_name = lGetString(ja_task, JAT_granted_pe); + if (pe_name != nullptr && a.pe == nullptr) { + CRITICAL("===> granted_pe is %s but pe_object is nullptr", pe_name); + } + if (pe_name != nullptr && a.pe != nullptr && sge_strnullcmp(pe_name, lGetString(a.pe, PE_name)) != 0) { + CRITICAL("===> granted_pe is %s but pe_object is %s", pe_name, lGetString(a.pe, PE_name)); } /* no need (so far) for passing ckpt information to debit_scheduled_job() */ @@ -1190,10 +1191,10 @@ void prepare_resource_schedules(const lList *running_jobs, const lList *suspende { DENTER(TOP_LAYER); - add_job_list_to_schedule(running_jobs, false, pe_list, host_list, queue_list, + add_job_list_to_schedule(running_jobs, false, host_list, queue_list, rqs_list, centry_list, acl_list, hgroup_list, ar_list, for_job_scheduling, now); - add_job_list_to_schedule(suspended_jobs, true, pe_list, host_list, queue_list, + add_job_list_to_schedule(suspended_jobs, true, host_list, queue_list, rqs_list, centry_list, acl_list, hgroup_list, ar_list, for_job_scheduling, now); add_calendar_to_schedule(queue_list, now);