Skip to content

Commit ac93b4f

Browse files
committed
osc/rdma: Fix race in attachment/location related to MPI_Win_attach().
There are cases where proc A will try to look for a region from proc B. However, B has not finished attaching its region, resulting in A giving up too soon and returning with OMPI_ERR_RMA_RANGE. This patch adds a reasonable retry for A to find this region to prevent this. Signed-off-by: Austen Lauria <awlauria@us.ibm.com>
1 parent d9a5f0f commit ac93b4f

File tree

3 files changed

+36
-14
lines changed

3 files changed

+36
-14
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ struct ompi_osc_rdma_component_t {
111111

112112
/** maximum count for network AMO usage */
113113
unsigned long network_amo_max_count;
114+
unsigned long dyn_win_attach_retry_count;
114115
};
115116
typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
116117

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,15 @@ static int ompi_osc_rdma_component_register (void)
289289
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, OPAL_INFO_LVL_3,
290290
MCA_BASE_VAR_SCOPE_LOCAL, &mca_osc_rdma_component.network_amo_max_count);
291291

292+
mca_osc_rdma_component.dyn_win_attach_retry_count = 50;
293+
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "dyn_win_attach_retry_count",
294+
"Number of times to retry searching for a dynamic window. This"
295+
"is an attempt to prevent failures when a window did not attach"
296+
"as quickly as expected. If an app is frequently failing with OMPI_ERR_RMA_RANGE"
297+
"when using dynamic windows, increasing this value may help. (default: 50)",
298+
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, 0, 0, OPAL_INFO_LVL_3,
299+
MCA_BASE_VAR_SCOPE_LOCAL, &mca_osc_rdma_component.dyn_win_attach_retry_count);
300+
292301
/* register performance variables */
293302

294303
(void) mca_base_component_pvar_register (&mca_osc_rdma_component.super.osc_version, "put_retry_count",
@@ -305,6 +314,7 @@ static int ompi_osc_rdma_component_register (void)
305314
ompi_osc_rdma_pvar_read, NULL, NULL,
306315
(void *) (intptr_t) offsetof (ompi_osc_rdma_module_t, get_retry_count));
307316

317+
308318
return OMPI_SUCCESS;
309319
}
310320

ompi/mca/osc/rdma/osc_rdma_dynamic.c

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -460,27 +460,38 @@ int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_
460460
ompi_osc_rdma_peer_dynamic_t *dy_peer = (ompi_osc_rdma_peer_dynamic_t *) peer;
461461
intptr_t bound = (intptr_t) base + len;
462462
ompi_osc_rdma_region_t *regions;
463-
int ret, region_count;
463+
int ret, region_count = 0;
464+
ompi_osc_rdma_state_t *peer_state = NULL;
464465

465466
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "locating dynamic memory region matching: {%" PRIx64 ", %" PRIx64 "}"
466467
" (len %lu)", base, base + len, (unsigned long) len);
467468

468-
if (!ompi_osc_rdma_peer_local_state (peer)) {
469-
ret = ompi_osc_rdma_refresh_dynamic_region (module, dy_peer);
470-
if (OMPI_SUCCESS != ret) {
471-
return ret;
469+
unsigned long num_retries = mca_osc_rdma_component.dyn_win_attach_retry_count;
470+
do {
471+
if (!ompi_osc_rdma_peer_local_state (peer)) {
472+
ret = ompi_osc_rdma_refresh_dynamic_region (module, dy_peer);
473+
if (OMPI_SUCCESS != ret) {
474+
return ret;
475+
}
476+
regions = dy_peer->regions;
477+
region_count = dy_peer->region_count;
478+
} else {
479+
peer_state = (ompi_osc_rdma_state_t *) peer->state;
480+
regions = (ompi_osc_rdma_region_t *) peer_state->regions;
481+
region_count = peer_state->region_count;
472482
}
473483

474-
regions = dy_peer->regions;
475-
region_count = dy_peer->region_count;
476-
} else {
477-
ompi_osc_rdma_state_t *peer_state = (ompi_osc_rdma_state_t *) peer->state;
478-
regions = (ompi_osc_rdma_region_t *) peer_state->regions;
479-
region_count = peer_state->region_count;
480-
}
484+
if(region_count > 0) {
485+
*region = ompi_osc_rdma_find_region_containing (regions, 0, region_count - 1, (intptr_t) base,
486+
bound, module->region_size, NULL);
487+
}
488+
--num_retries;
489+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "locating dynamic memory region matching: {%" PRIx64 ", %" PRIx64 "}"
490+
" (len %lu) (attempts left = %lu)", base, base + len, (unsigned long) len, num_retries);
491+
opal_atomic_wmb ();
492+
} while (!*region && (num_retries > 0));
481493

482-
*region = ompi_osc_rdma_find_region_containing (regions, 0, region_count - 1, (intptr_t) base, bound, module->region_size, NULL);
483-
if (!*region) {
494+
if(!*region) {
484495
return OMPI_ERR_RMA_RANGE;
485496
}
486497

0 commit comments

Comments
 (0)