Closed
Description
@hjelmn can you please have a look at this ?
here is a reproducer
#include <mpi.h>
int main(int argc, char* argv[])
{
MPI_Win win;
double * d, a;
MPI_Init(&argc, &argv);
a = 0;
MPI_Win_allocate(sizeof(double), 1, MPI_INFO_NULL, MPI_COMM_SELF, (void *)&d, &win);
MPI_Win_lock_all(MPI_MODE_NOCHECK, win);
*d = 0.;
MPI_Accumulate(&a, 1, MPI_DOUBLE, 0, 0, 1, MPI_DOUBLE, MPI_SUM, win);
MPI_Win_flush_all(win);
MPI_Win_unlock_all(win);
MPI_Win_free(&win);
MPI_Finalize();
return 0;
}
this can be ran with only one MPI task.
it works fine with --mca osc sm
on both v1.10
and master
but with --mca osc pt2pt
, it works fine on v1.10
but it hangs on master
i ran this under the debugger, and ended up writing this patch so master
mimic v1.10
.
that being said, i have no idea whether this is correct or not ...
diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c
index 7e28914..34df3ab 100644
--- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c
+++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c
@@ -2,6 +2,8 @@
/*
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
+ * Copyright (c) 2016 Research Organization for Information Science
+ * and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -17,6 +19,7 @@ static void ompi_osc_pt2pt_sync_constructor (ompi_osc_pt2pt_sync_t *sync)
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
sync->eager_send_active = false;
sync->epoch_active = false;
+ sync->sync.pscw.group = NULL;
OBJ_CONSTRUCT(&sync->lock, opal_mutex_t);
OBJ_CONSTRUCT(&sync->cond, opal_condition_t);
}
diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h
index eee2964..cfed4e9 100644
--- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h
+++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.h
@@ -2,6 +2,8 @@
/*
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
+ * Copyright (c) 2016 Research Organization for Information Science
+ * and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -45,7 +47,7 @@ struct ompi_osc_pt2pt_sync_t {
ompi_osc_pt2pt_sync_type_t type;
/** synchronization data */
- union {
+ struct {
/** lock specific synchronization data */
struct {
/** lock target rank (-1 for all) */
@@ -129,13 +131,15 @@ bool ompi_osc_pt2pt_sync_pscw_peer (struct ompi_osc_pt2pt_module_t *module, int
*/
static inline void ompi_osc_pt2pt_sync_wait (ompi_osc_pt2pt_sync_t *sync)
{
- OPAL_THREAD_LOCK(&sync->lock);
- while (!sync->eager_send_active) {
- OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
- "waiting for access epoch to start"));
- opal_condition_wait(&sync->cond, &sync->lock);
+ if (sync->sync.pscw.group) {
+ OPAL_THREAD_LOCK(&sync->lock);
+ while (!sync->eager_send_active) {
+ OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
+ "waiting for access epoch to start"));
+ opal_condition_wait(&sync->cond, &sync->lock);
+ }
+ OPAL_THREAD_UNLOCK(&sync->lock);
}
- OPAL_THREAD_UNLOCK(&sync->lock);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
"access epoch ready"));