Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions ompi/mca/osc/rdma/osc_rdma_accumulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,9 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi
new_value = old_value;

if (&ompi_mpi_op_replace.op == op) {
memcpy ((void *)((intptr_t) &new_value + offset), origin_addr + dt->super.true_lb, extent);
memcpy ((void *)((intptr_t) &new_value + offset), (void *)((intptr_t) origin_addr + dt->super.true_lb), extent);
} else if (&ompi_mpi_op_no_op.op != op) {
ompi_op_reduce (op, (void *) origin_addr + dt->super.true_lb, (void*)((intptr_t) &new_value + offset), 1, dt);
ompi_op_reduce (op, (void *) ((intptr_t) origin_addr + dt->super.true_lb), (void*)((intptr_t) &new_value + offset), 1, dt);
}

ret = ompi_osc_rdma_btl_cswap (module, peer->data_endpoint, address, target_handle,
Expand All @@ -219,7 +219,7 @@ static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const vo
{
ompi_osc_rdma_module_t *module = sync->module;
int32_t atomic_flags = module->selected_btl->btl_atomic_flags;
int ret, btl_op, flags;
int btl_op, flags;
int64_t origin;

if (!(module->selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
Expand Down Expand Up @@ -261,7 +261,6 @@ static inline int ompi_osc_rdma_gacc_amo (ompi_osc_rdma_module_t *module, ompi_o
const bool use_amo = module->acc_use_amo;
const size_t dt_size = datatype->super.size;
void *to_free = NULL;
uint64_t offset = 0;
int ret;

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "using network atomics for accumulate operation with count %d", count);
Expand Down Expand Up @@ -339,7 +338,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
/* if the datatype is small enough (and the count is 1) then try to directly use the hardware to execute
* the atomic operation. this should be safe in all cases as either 1) the user has assured us they will
* never use atomics with count > 1, 2) we have the accumulate lock, or 3) we have an exclusive lock */
if (target_datatype->super.size <= 8 && target_count <= module->network_amo_max_count) {
if ((target_datatype->super.size <= 8) && (((unsigned long) target_count) <= module->network_amo_max_count)) {
ret = ompi_osc_rdma_gacc_amo (module, sync, source, result, result_count, result_datatype, result_convertor,
peer, target_address, target_handle, target_count, target_datatype, op, request);
if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
Expand Down Expand Up @@ -867,7 +866,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_win_t *win, const void *origin_
ompi_osc_rdma_module_t *module = GET_MODULE(win);
mca_btl_base_registration_handle_t *target_handle;
uint64_t target_address;
ptrdiff_t lb, target_lb, target_span;
ptrdiff_t target_lb, target_span;
ompi_osc_rdma_request_t *rdma_request = NULL;
bool lock_acquired = false;
ompi_osc_rdma_sync_t *sync;
Expand Down
39 changes: 22 additions & 17 deletions ompi/mca/osc/rdma/osc_rdma_dynamic.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
ompi_osc_rdma_handle_t *rdma_region_handle;
osc_rdma_counter_t region_count;
osc_rdma_counter_t region_id;
intptr_t bound, aligned_base, aligned_bound;
intptr_t aligned_base, aligned_bound;
intptr_t page_size = opal_getpagesize ();
int region_index, ret;
size_t aligned_len;
Expand All @@ -185,6 +185,7 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);

OPAL_THREAD_LOCK(&module->lock);
ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));

region_count = module->state->region_count & 0xffffffffL;
region_id = module->state->region_count >> 32;
Expand All @@ -197,7 +198,6 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)

/* it is wasteful to register less than a page. this may allow the remote side to access more
* memory but the MPI standard covers this with calling the calling behavior erroneous */
bound = (intptr_t) base + len;
aligned_bound = OPAL_ALIGN((intptr_t) base + len, page_size, intptr_t);
aligned_base = (intptr_t) base & ~(page_size - 1);
aligned_len = (size_t)(aligned_bound - aligned_base);
Expand All @@ -209,16 +209,11 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
/* validates that the region does not overlap with an existing region even if they are on the same page */
ret = ompi_osc_rdma_add_attachment (module->dynamic_handles[region_index], (intptr_t) base, len);
OPAL_THREAD_UNLOCK(&module->lock);
ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
/* no need to invalidate remote caches */
return ret;
}

/* region is in flux */
module->state->region_count = -1;
opal_atomic_wmb ();

ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));

/* do a binary seach for where the region should be inserted */
if (region_count) {
region = find_insertion_point ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1,
Expand All @@ -239,7 +234,7 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
region->len = aligned_len;

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "attaching dynamic memory region {%p, %p} aligned {%p, %p}, at index %d",
base, (void *) bound, (void *) aligned_base, (void *) aligned_bound, region_index);
base, (void *) ((intptr_t) base + len), (void *) aligned_base, (void *) aligned_bound, region_index);

/* add RDMA region handle to track this region */
rdma_region_handle = OBJ_NEW(ompi_osc_rdma_handle_t);
Expand All @@ -253,6 +248,7 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
OPAL_THREAD_UNLOCK(&module->lock);
OBJ_RELEASE(rdma_region_handle);
ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
return OMPI_ERR_RMA_ATTACH;
}

Expand All @@ -275,9 +271,9 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
}
#endif

opal_atomic_mb ();
/* the region state has changed */
module->state->region_count = ((region_id + 1) << 32) | (region_count + 1);
opal_atomic_wmb ();

ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
OPAL_THREAD_UNLOCK(&module->lock);
Expand All @@ -304,6 +300,9 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)

OPAL_THREAD_LOCK(&module->lock);

/* lock the region so it can't change while a peer is reading it */
ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));

/* the upper 4 bytes of the region count are an instance counter */
region_count = module->state->region_count & 0xffffffffL;
region_id = module->state->region_count >> 32;
Expand All @@ -327,23 +326,22 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
if (region_index == region_count) {
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not find dynamic memory attachment for %p", base);
OPAL_THREAD_UNLOCK(&module->lock);
ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
return OMPI_ERR_BASE;
}

if (!opal_list_is_empty (&rdma_region_handle->attachments)) {
/* another region is referencing this attachment */
OPAL_THREAD_UNLOCK(&module->lock);
ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
return OMPI_SUCCESS;
}

/* lock the region so it can't change while a peer is reading it */
ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "detaching dynamic memory region {%p, %p} from index %d",
base, (void *)((intptr_t) base + region->len), region_index);

if (module->selected_btl->btl_register_mem) {
ompi_osc_rdma_deregister (module, rdma_region_handle->btl_handle);

}

if (region_index < region_count - 1) {
Expand All @@ -358,6 +356,7 @@ int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
module->dynamic_handles[region_count - 1] = NULL;

module->state->region_count = ((region_id + 1) << 32) | (region_count - 1);
opal_atomic_wmb ();

ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));

Expand Down Expand Up @@ -461,14 +460,18 @@ int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_
ompi_osc_rdma_peer_dynamic_t *dy_peer = (ompi_osc_rdma_peer_dynamic_t *) peer;
intptr_t bound = (intptr_t) base + len;
ompi_osc_rdma_region_t *regions;
int ret, region_count;
int ret = OMPI_SUCCESS, region_count;

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "locating dynamic memory region matching: {%" PRIx64 ", %" PRIx64 "}"
" (len %lu)", base, base + len, (unsigned long) len);

OPAL_THREAD_LOCK(&module->lock);
// Make sure region isn't being touched.
ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
if (!ompi_osc_rdma_peer_local_state (peer)) {
ret = ompi_osc_rdma_refresh_dynamic_region (module, dy_peer);
if (OMPI_SUCCESS != ret) {
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
return ret;
}

Expand All @@ -482,9 +485,11 @@ int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_

*region = ompi_osc_rdma_find_region_containing (regions, 0, region_count - 1, (intptr_t) base, bound, module->region_size, NULL);
if (!*region) {
return OMPI_ERR_RMA_RANGE;
ret = OMPI_ERR_RMA_RANGE;
}
OPAL_THREAD_UNLOCK(&module->lock);
ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, regions_lock));

/* round a matching region */
return OMPI_SUCCESS;
return ret;
}