Skip to content

Commit a0d4106

Browse files
committed
osc/rdma: Fix some bugs running with btl/tcp.
- Make sure peer->state_endpoint is set correctly. - Fix double free of pending_op in ompi_osc_rdma_btl_fop() and ompi_osc_rdma_btl_op(). Cleanup/leaks: - Don't parse ompi_osc_rdma_btl_alternate_names twice. - free temp in allocate_state_shared(). Signed-off-by: Austen Lauria <awlauria@us.ibm.com>
1 parent ff1ba01 commit a0d4106

File tree

2 files changed

+10
-20
lines changed

2 files changed

+10
-20
lines changed

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
581581
int my_rank = ompi_comm_rank (module->comm);
582582
int global_size = ompi_comm_size (module->comm);
583583
ompi_osc_rdma_region_t *state_region;
584-
struct _local_data *temp;
584+
struct _local_data *temp = NULL;
585585
char *data_file;
586586
int page_size = opal_getpagesize();
587587

@@ -624,13 +624,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
624624
size += OPAL_ALIGN_PAD_AMOUNT(size, page_size);
625625
}
626626

627-
do {
628-
temp = calloc (local_size, sizeof (temp[0]));
629-
if (NULL == temp) {
630-
ret = OMPI_ERR_OUT_OF_RESOURCE;
631-
break;
632-
}
627+
temp = calloc (local_size, sizeof (temp[0]));
628+
if (NULL == temp) {
629+
return OMPI_ERR_OUT_OF_RESOURCE;
630+
}
633631

632+
do {
634633
temp[local_rank].rank = my_rank;
635634
temp[local_rank].size = size;
636635

@@ -788,10 +787,8 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
788787
peer->state_handle = (mca_btl_base_registration_handle_t *) state_region->btl_handle_data;
789788
}
790789
peer->state = (osc_rdma_counter_t) ((uintptr_t) state_region->base + state_base + module->state_size * i);
791-
if (i > 0) {
792-
peer->state_endpoint = local_leader->state_endpoint;
793-
peer->state_btl_index = local_leader->state_btl_index;
794-
}
790+
peer->state_endpoint = local_leader->data_endpoint; // data_endpoint initialized in ompi_osc_rdma_new_peer();
791+
peer->state_btl_index = local_leader->data_btl_index;
795792
}
796793

797794
if (my_rank == peer_rank) {
@@ -914,10 +911,8 @@ static void ompi_osc_rdma_ensure_local_add_procs (void)
914911
static int ompi_osc_rdma_query_alternate_btls (ompi_communicator_t *comm, ompi_osc_rdma_module_t *module)
915912
{
916913
mca_btl_base_selected_module_t *item;
917-
char **btls_to_use = opal_argv_split (ompi_osc_rdma_btl_alternate_names, ',');
918914
int btls_found = 0;
919-
920-
btls_to_use = opal_argv_split (ompi_osc_rdma_btl_alternate_names, ',');
915+
char **btls_to_use = opal_argv_split (ompi_osc_rdma_btl_alternate_names, ',');
921916
if (NULL == btls_to_use) {
922917
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no alternate BTLs requested: %s", ompi_osc_rdma_btl_alternate_names);
923918
return OMPI_ERR_UNREACH;

ompi/mca/osc/rdma/osc_rdma_lock.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,7 @@ static inline int ompi_osc_rdma_btl_fop (ompi_osc_rdma_module_t *module, uint8_t
8989
*result = ((int64_t *) pending_op->op_buffer)[0];
9090
ret = OMPI_SUCCESS;
9191
ompi_osc_rdma_atomic_complete (selected_btl, endpoint, pending_op->op_buffer,
92-
pending_op->op_frag->handle, (void *) pending_op, NULL, OPAL_SUCCESS);
93-
} else {
94-
/* need to release here because ompi_osc_rdma_atomic_complete was not called */
95-
OBJ_RELEASE(pending_op);
92+
NULL, (void *) pending_op, NULL, OPAL_SUCCESS);
9693
}
9794
} else if (wait_for_completion) {
9895
while (!pending_op->op_complete) {
@@ -227,8 +224,6 @@ static inline int ompi_osc_rdma_btl_cswap (ompi_osc_rdma_module_t *module, uint8
227224
ret = OMPI_SUCCESS;
228225
}
229226

230-
/* need to release here because ompi_osc_rdma_atomic_complete was not called */
231-
OBJ_RELEASE(pending_op);
232227
} else {
233228
while (!pending_op->op_complete) {
234229
ompi_osc_rdma_progress (module);

0 commit comments

Comments
 (0)