|
69 | 69 | #include "orte/mca/odls/base/base.h" |
70 | 70 | #include "orte/mca/plm/plm.h" |
71 | 71 | #include "orte/mca/plm/base/plm_private.h" |
| 72 | +#include "orte/mca/rmaps/rmaps_types.h" |
72 | 73 | #include "orte/mca/routed/routed.h" |
73 | 74 | #include "orte/mca/ess/ess.h" |
74 | 75 | #include "orte/mca/state/state.h" |
@@ -122,6 +123,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, |
122 | 123 | opal_pstats_t pstat; |
123 | 124 | char *rtmod; |
124 | 125 | char *coprocessors; |
| 126 | + orte_job_map_t *map; |
125 | 127 |
|
126 | 128 | /* unpack the command */ |
127 | 129 | n = 1; |
@@ -557,6 +559,66 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, |
557 | 559 | } |
558 | 560 | break; |
559 | 561 |
|
| 562 | + |
| 563 | + /**** DVM CLEANUP JOB COMMAND ****/ |
| 564 | + case ORTE_DAEMON_DVM_CLEANUP_JOB_CMD: |
| 565 | + /* unpack the jobid */ |
| 566 | + n = 1; |
| 567 | + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job, &n, ORTE_JOBID))) { |
| 568 | + ORTE_ERROR_LOG(ret); |
| 569 | + goto CLEANUP; |
| 570 | + } |
| 571 | + |
| 572 | + /* look up job data object */ |
| 573 | + if (NULL == (jdata = orte_get_job_data_object(job))) { |
| 574 | + /* we can safely ignore this request as the job |
| 575 | + * was already cleaned up */ |
| 576 | + goto CLEANUP; |
| 577 | + } |
| 578 | + |
| 579 | + /* if we have any local children for this job, then we |
| 580 | + * can ignore this request as we would have already |
| 581 | + * dealt with it */ |
| 582 | + if (0 < jdata->num_local_procs) { |
| 583 | + goto CLEANUP; |
| 584 | + } |
| 585 | + |
| 586 | + /* release all resources (even those on other nodes) that we |
| 587 | + * assigned to this job */ |
| 588 | + if (NULL != jdata->map) { |
| 589 | + map = (orte_job_map_t*)jdata->map; |
| 590 | + for (n = 0; n < map->nodes->size; n++) { |
| 591 | + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, n))) { |
| 592 | + continue; |
| 593 | + } |
| 594 | + for (i = 0; i < node->procs->size; i++) { |
| 595 | + if (NULL == (proct = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { |
| 596 | + continue; |
| 597 | + } |
| 598 | + if (proct->name.jobid != jdata->jobid) { |
| 599 | + /* skip procs from another job */ |
| 600 | + continue; |
| 601 | + } |
| 602 | + node->slots_inuse--; |
| 603 | + node->num_procs--; |
| 604 | + /* set the entry in the node array to NULL */ |
| 605 | + opal_pointer_array_set_item(node->procs, i, NULL); |
| 606 | + /* release the proc once for the map entry */ |
| 607 | + OBJ_RELEASE(proct); |
| 608 | + } |
| 609 | + /* set the node location to NULL */ |
| 610 | + opal_pointer_array_set_item(map->nodes, n, NULL); |
| 611 | + /* maintain accounting */ |
| 612 | + OBJ_RELEASE(node); |
| 613 | + /* flag that the node is no longer in a map */ |
| 614 | + ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); |
| 615 | + } |
| 616 | + OBJ_RELEASE(map); |
| 617 | + jdata->map = NULL; |
| 618 | + } |
| 619 | + break; |
| 620 | + |
| 621 | + |
560 | 622 | /**** REPORT TOPOLOGY COMMAND ****/ |
561 | 623 | case ORTE_DAEMON_REPORT_TOPOLOGY_CMD: |
562 | 624 | answer = OBJ_NEW(opal_buffer_t); |
@@ -1337,6 +1399,9 @@ static char *get_orted_comm_cmd_str(int command) |
1337 | 1399 | case ORTE_DAEMON_GET_MEMPROFILE: |
1338 | 1400 | return strdup("ORTE_DAEMON_GET_MEMPROFILE"); |
1339 | 1401 |
|
| 1402 | + case ORTE_DAEMON_DVM_CLEANUP_JOB_CMD: |
| 1403 | + return strdup("ORTE_DAEMON_DVM_CLEANUP_JOB_CMD"); |
| 1404 | + |
1340 | 1405 | default: |
1341 | 1406 | return strdup("Unknown Command!"); |
1342 | 1407 | } |
|
0 commit comments