Skip to content

Commit 92ac56b

Browse files
ompi/group: fix race condition in group operations
In group operation (eg, MPI_Group_union) each proc pointers in one group is compared with other to check if it is equal, but during the execution of the inner loop if a thread from the same process updates the proc pointer (because of sentinels, same proc can have two different pointers) this will result in inconsistent / erroneous comparison result. so take a proc lock in group operations to avoid race condition when comparing the proc pointers in groups. In order to avoid recursive locks in ompi_group_peer_lookup pass a flag with `OMPI_GROUP_LOCK` bit unset, if the `OMPI_GROUP_LOCK` bit is set, proc lock is taken else the proc lock is avoided in ompi_group_peer_lookup path. Signed-off-by: Aboorva Devarajan <abodevar@in.ibm.com>
1 parent a53326c commit 92ac56b

File tree

7 files changed

+50
-31
lines changed

7 files changed

+50
-31
lines changed

ompi/group/group.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ int ompi_group_compare(ompi_group_t *group1,
495495
bool similar, identical;
496496
ompi_group_t *group1_pointer, *group2_pointer;
497497
ompi_proc_t *proc1_pointer, *proc2_pointer;
498-
498+
opal_mutex_lock (&ompi_proc_lock);
499499
/* check for same groups */
500500
if( group1 == group2 ) {
501501
*result=MPI_IDENT;
@@ -505,6 +505,7 @@ int ompi_group_compare(ompi_group_t *group1,
505505
/* check to see if either is MPI_GROUP_NULL or MPI_GROUP_EMPTY */
506506
if( ( MPI_GROUP_EMPTY == group1 ) || ( MPI_GROUP_EMPTY == group2 ) ) {
507507
*result=MPI_UNEQUAL;
508+
opal_mutex_unlock (&ompi_proc_lock);
508509
return return_value;
509510
}
510511

@@ -516,6 +517,7 @@ int ompi_group_compare(ompi_group_t *group1,
516517
if( group1_pointer->grp_proc_count != group2_pointer->grp_proc_count ) {
517518
/* if not same size - return */
518519
*result=MPI_UNEQUAL;
520+
opal_mutex_unlock (&ompi_proc_lock);
519521
return return_value;
520522
}
521523

@@ -524,11 +526,11 @@ int ompi_group_compare(ompi_group_t *group1,
524526
similar=true;
525527
identical=true;
526528
for(proc1=0 ; proc1 < group1_pointer->grp_proc_count ; proc1++ ) {
527-
proc1_pointer= ompi_group_peer_lookup(group1_pointer,proc1);
529+
proc1_pointer= ompi_group_peer_lookup_nolock(group1_pointer,proc1);
528530
/* loop over group2 processes to find "match" */
529531
match=-1;
530532
for(proc2=0 ; proc2 < group2_pointer->grp_proc_count ; proc2++ ) {
531-
proc2_pointer=ompi_group_peer_lookup(group2_pointer,proc2);
533+
proc2_pointer=ompi_group_peer_lookup_nolock(group2_pointer,proc2);
532534
if( proc1_pointer == proc2_pointer ) {
533535
if(proc1 != proc2 ) {
534536
identical=false;
@@ -552,7 +554,7 @@ int ompi_group_compare(ompi_group_t *group1,
552554
} else {
553555
*result=MPI_UNEQUAL;
554556
}
555-
557+
opal_mutex_unlock (&ompi_proc_lock);
556558
return return_value;
557559
}
558560

ompi/group/group.h

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ struct ompi_group_bitmap_data_t
6868
int grp_bitmap_array_len; /* length of the bit array */
6969
};
7070

71+
#define OMPI_GROUP_ALLOCATE 0x01
72+
#define OMPI_GROUP_LOCK 0x02
73+
7174
/**
7275
* Group structure
7376
* Currently we have four formats for storing the process pointers that are members
@@ -337,15 +340,14 @@ int ompi_group_minloc (int list[], int length);
337340
* @brief Helper function for retreiving the proc of a group member in a dense group
338341
*
339342
* This function exists to handle the translation of sentinel group members to real
340-
* ompi_proc_t's. If a sentinel value is found and allocate is true then this function
343+
* ompi_proc_t's. If a sentinel value is found and allocate bit is true then this function
341344
* looks for an existing ompi_proc_t using ompi_proc_for_name which will allocate a
342-
* ompi_proc_t if one does not exist. If allocate is false then sentinel values translate
345+
* ompi_proc_t if one does not exist. If allocate bit is false then sentinel values translate
343346
* to NULL.
344347
*/
345-
static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, const int peer_id, const bool allocate)
348+
static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group, const int peer_id, unsigned char flags)
346349
{
347-
ompi_proc_t *proc;
348-
350+
ompi_proc_t *proc, *real_proc;
349351
#if OPAL_ENABLE_DEBUG
350352
if (peer_id >= group->grp_proc_count) {
351353
opal_output(0, "ompi_group_dense_lookup: invalid peer index (%d)", peer_id);
@@ -356,13 +358,17 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group,
356358
proc = group->grp_proc_pointers[peer_id];
357359

358360
if (OPAL_UNLIKELY(ompi_proc_is_sentinel (proc))) {
359-
if (!allocate) {
361+
if (flags == 0) {
360362
return NULL;
361363
}
362-
363364
/* replace sentinel value with an actual ompi_proc_t */
364-
ompi_proc_t *real_proc =
365-
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc));
365+
if ((flags & OMPI_GROUP_LOCK) == OMPI_GROUP_LOCK) {
366+
real_proc =
367+
(ompi_proc_t *) ompi_proc_for_name (ompi_proc_sentinel_to_name ((uintptr_t) proc));
368+
} else {
369+
real_proc =
370+
(ompi_proc_t *) ompi_proc_for_name_nolock (ompi_proc_sentinel_to_name ((uintptr_t) proc));
371+
}
366372

367373
if (opal_atomic_compare_exchange_strong_ptr ((opal_atomic_intptr_t *)(group->grp_proc_pointers + peer_id),
368374
(intptr_t *) &proc, (intptr_t) real_proc)) {
@@ -379,19 +385,19 @@ static inline struct ompi_proc_t *ompi_group_dense_lookup (ompi_group_t *group,
379385
* This is the function that iterates through the sparse groups to the dense group
380386
* to reach the process pointer
381387
*/
382-
static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int rank, const bool allocate)
388+
static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int rank, unsigned char flags)
383389
{
384390
#if OMPI_GROUP_SPARSE
385391
do {
386392
if (OMPI_GROUP_IS_DENSE(group)) {
387-
return ompi_group_dense_lookup (group, rank, allocate);
393+
return ompi_group_dense_lookup (group, rank, flags);
388394
}
389395
int ranks1 = rank;
390396
ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank);
391397
group = group->grp_parent_group_ptr;
392398
} while (1);
393399
#else
394-
return ompi_group_dense_lookup (group, rank, allocate);
400+
return ompi_group_dense_lookup (group, rank, flags);
395401
#endif
396402
}
397403

@@ -420,12 +426,17 @@ static inline opal_process_name_t ompi_group_get_proc_name (ompi_group_t *group,
420426
*/
421427
static inline struct ompi_proc_t* ompi_group_peer_lookup(ompi_group_t *group, int peer_id)
422428
{
423-
return ompi_group_get_proc_ptr (group, peer_id, true);
429+
return ompi_group_get_proc_ptr (group, peer_id, (unsigned char) (OMPI_GROUP_ALLOCATE | OMPI_GROUP_LOCK));
430+
}
431+
432+
static inline struct ompi_proc_t* ompi_group_peer_lookup_nolock(ompi_group_t *group, int peer_id)
433+
{
434+
return ompi_group_get_proc_ptr (group, peer_id, (unsigned char) OMPI_GROUP_ALLOCATE);
424435
}
425436

426437
static inline struct ompi_proc_t *ompi_group_peer_lookup_existing (ompi_group_t *group, int peer_id)
427438
{
428-
return ompi_group_get_proc_ptr (group, peer_id, false);
439+
return ompi_group_get_proc_ptr (group, peer_id, (unsigned char) 0);
429440
}
430441

431442
#if OPAL_ENABLE_FT_MPI

ompi/group/group_plist.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,12 @@ int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2,
163163
if (OPAL_SUCCESS != rc) {
164164
return rc;
165165
}
166-
166+
opal_mutex_lock (&ompi_proc_lock);
167167
/* check group2 elements to see if they need to be included in the list */
168168
overlap_count = ompi_group_dense_overlap (group1, group2, &bitmap);
169169
if (0 > overlap_count) {
170170
OBJ_DESTRUCT(&bitmap);
171+
opal_mutex_unlock (&ompi_proc_lock);
171172
return overlap_count;
172173
}
173174

@@ -176,13 +177,15 @@ int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2,
176177
*new_group = MPI_GROUP_EMPTY;
177178
OBJ_RETAIN(MPI_GROUP_EMPTY);
178179
OBJ_DESTRUCT(&bitmap);
180+
opal_mutex_unlock (&ompi_proc_lock);
179181
return MPI_SUCCESS;
180182
}
181183

182184
/* get new group struct */
183185
new_group_pointer = ompi_group_allocate(new_group_size);
184186
if (NULL == new_group_pointer) {
185187
OBJ_DESTRUCT(&bitmap);
188+
opal_mutex_unlock (&ompi_proc_lock);
186189
return MPI_ERR_GROUP;
187190
}
188191

@@ -218,7 +221,7 @@ int ompi_group_union (ompi_group_t* group1, ompi_group_t* group2,
218221
}
219222

220223
*new_group = (MPI_Group) new_group_pointer;
221-
224+
opal_mutex_unlock (&ompi_proc_lock);
222225
return OMPI_SUCCESS;
223226
}
224227

@@ -245,11 +248,12 @@ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2,
245248
if (OPAL_SUCCESS != rc) {
246249
return rc;
247250
}
248-
251+
opal_mutex_lock (&ompi_proc_lock);
249252
/* check group2 elements to see if they need to be included in the list */
250253
overlap_count = ompi_group_dense_overlap (group2, group1, &bitmap);
251254
if (0 > overlap_count) {
252255
OBJ_DESTRUCT(&bitmap);
256+
opal_mutex_unlock (&ompi_proc_lock);
253257
return overlap_count;
254258
}
255259

@@ -258,13 +262,15 @@ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2,
258262
*new_group = MPI_GROUP_EMPTY;
259263
OBJ_RETAIN(MPI_GROUP_EMPTY);
260264
OBJ_DESTRUCT(&bitmap);
265+
opal_mutex_unlock (&ompi_proc_lock);
261266
return MPI_SUCCESS;
262267
}
263268

264269
/* allocate a new ompi_group_t structure */
265270
new_group_pointer = ompi_group_allocate(new_group_size);
266271
if( NULL == new_group_pointer ) {
267272
OBJ_DESTRUCT(&bitmap);
273+
opal_mutex_unlock (&ompi_proc_lock);
268274
return MPI_ERR_GROUP;
269275
}
270276

@@ -292,6 +298,6 @@ int ompi_group_difference(ompi_group_t* group1, ompi_group_t* group2,
292298
}
293299

294300
*new_group = (MPI_Group)new_group_pointer;
295-
301+
opal_mutex_unlock (&ompi_proc_lock);
296302
return OMPI_SUCCESS;
297303
}

ompi/mca/common/monitoring/common_monitoring.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ static inline int mca_common_monitoring_get_world_rank(int dest, ompi_group_t *g
7575
opal_process_name_t tmp;
7676

7777
/* find the processor of the destination */
78-
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, dest, true);
78+
ompi_proc_t *proc = ompi_group_get_proc_ptr(group, dest, OMPI_GROUP_ALLOCATE | OMPI_GROUP_LOCK);
7979
if( ompi_proc_is_sentinel(proc) ) {
8080
tmp = ompi_proc_sentinel_to_name((uintptr_t)proc);
8181
} else {

ompi/proc/proc.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,12 @@
4848
#include "ompi/mca/pml/pml.h"
4949

5050
opal_list_t ompi_proc_list = {{0}};
51-
static opal_mutex_t ompi_proc_lock;
5251
static opal_hash_table_t ompi_proc_hash;
53-
52+
opal_mutex_t ompi_proc_lock;
5453
ompi_proc_t* ompi_proc_local_proc = NULL;
5554

5655
static void ompi_proc_construct(ompi_proc_t* proc);
5756
static void ompi_proc_destruct(ompi_proc_t* proc);
58-
static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name);
5957

6058
OBJ_CLASS_INSTANCE(
6159
ompi_proc_t,
@@ -191,7 +189,7 @@ opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name)
191189
return NULL;
192190
}
193191

194-
static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name)
192+
ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name)
195193
{
196194
ompi_proc_t *proc = NULL;
197195
int ret;

ompi/proc/proc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ OBJ_CLASS_DECLARATION(ompi_proc_t);
100100
*/
101101
OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc;
102102
OMPI_DECLSPEC extern opal_list_t ompi_proc_list;
103-
103+
OMPI_DECLSPEC extern opal_mutex_t ompi_proc_lock;
104104
/* ******************************************************************** */
105105

106106

@@ -381,7 +381,7 @@ OMPI_DECLSPEC int ompi_proc_refresh(void);
381381
* the ompi_proc_t on a communicator.
382382
*/
383383
OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name);
384-
384+
OMPI_DECLSPEC ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name);
385385

386386
OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name);
387387

ompi/runtime/ompi_mpi_abort.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,17 @@ static void try_kill_peers(ompi_communicator_t *comm,
100100
} else {
101101
assert(count <= nprocs);
102102
procs[count++] =
103-
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
103+
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i,
104+
OMPI_GROUP_ALLOCATE | OMPI_GROUP_LOCK)->super.proc_name);
104105
}
105106
}
106107

107108
/* if requested, kill off remote group procs too */
108109
for (i = 0; i < ompi_comm_remote_size(comm); ++i) {
109110
assert(count <= nprocs);
110111
procs[count++] =
111-
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i, true)->super.proc_name);
112+
*OMPI_CAST_RTE_NAME(&ompi_group_get_proc_ptr(comm->c_remote_group, i,
113+
OMPI_GROUP_ALLOCATE | OMPI_GROUP_LOCK)->super.proc_name);
112114
}
113115

114116
if (nprocs > 0) {

0 commit comments

Comments
 (0)