Skip to content

Commit e3ca132

Browse files
committed
Make sure opal_start_thread always spawns pthreads
Users of `opal_start_thread` (btl/tcp, ft, smcuda, progress thread) may spawn threads that may block in functions unaware of argobots or qthreads (e.g., libevent or read(3)). If we spawn an argobot or qthread instead of a pthread the thread executing the argobot or qthread (potentially the main thread) blocks, leading to a deadlock situation. Open MPI expects the semantics of a pthread so we should handle all internal threads as such. Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
1 parent 38c2e37 commit e3ca132

File tree

9 files changed

+98
-222
lines changed

9 files changed

+98
-222
lines changed

opal/mca/threads/argobots/threads_argobots_module.c

Lines changed: 1 addition & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2005 The University of Tennessee and The University
6+
* Copyright (c) 2004-2021 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -33,78 +33,6 @@
3333
#include "opal/util/output.h"
3434
#include "opal/util/sys_limits.h"
3535

36-
/*
37-
* Constructor
38-
*/
39-
static void opal_thread_construct(opal_thread_t *t)
40-
{
41-
t->t_run = 0;
42-
t->t_handle = ABT_THREAD_NULL;
43-
}
44-
45-
OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL);
46-
47-
static inline ABT_thread opal_thread_get_argobots_self(void)
48-
{
49-
ABT_thread self;
50-
ABT_thread_self(&self);
51-
return self;
52-
}
53-
54-
static void opal_thread_argobots_wrapper(void *arg)
55-
{
56-
opal_thread_t *t = (opal_thread_t *) arg;
57-
t->t_ret = ((void *(*) (void *) ) t->t_run)(t);
58-
}
59-
60-
opal_thread_t *opal_thread_get_self(void)
61-
{
62-
opal_threads_argobots_ensure_init();
63-
opal_thread_t *t = OBJ_NEW(opal_thread_t);
64-
t->t_handle = opal_thread_get_argobots_self();
65-
return t;
66-
}
67-
68-
bool opal_thread_self_compare(opal_thread_t *t)
69-
{
70-
opal_threads_argobots_ensure_init();
71-
return opal_thread_get_argobots_self() == t->t_handle;
72-
}
73-
74-
int opal_thread_join(opal_thread_t *t, void **thr_return)
75-
{
76-
int rc = ABT_thread_free(&t->t_handle);
77-
if (thr_return) {
78-
*thr_return = t->t_ret;
79-
}
80-
t->t_handle = ABT_THREAD_NULL;
81-
return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR;
82-
}
83-
84-
void opal_thread_set_main()
85-
{
86-
}
87-
88-
int opal_thread_start(opal_thread_t *t)
89-
{
90-
opal_threads_argobots_ensure_init();
91-
int rc;
92-
if (OPAL_ENABLE_DEBUG) {
93-
if (NULL == t->t_run || ABT_THREAD_NULL != t->t_handle) {
94-
return OPAL_ERR_BAD_PARAM;
95-
}
96-
}
97-
98-
ABT_xstream self_xstream;
99-
ABT_xstream_self(&self_xstream);
100-
rc = ABT_thread_create_on_xstream(self_xstream, opal_thread_argobots_wrapper, t,
101-
ABT_THREAD_ATTR_NULL, &t->t_handle);
102-
103-
return (ABT_SUCCESS == rc) ? OPAL_SUCCESS : OPAL_ERROR;
104-
}
105-
106-
OBJ_CLASS_DECLARATION(opal_thread_t);
107-
10836
int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor)
10937
{
11038
opal_threads_argobots_ensure_init();

opal/mca/threads/argobots/threads_argobots_threads.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2005 The University of Tennessee and The University
6+
* Copyright (c) 2004-2021 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
@@ -27,15 +27,6 @@
2727
#define OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H
2828

2929
#include "opal/mca/threads/argobots/threads_argobots.h"
30-
#include <signal.h>
31-
32-
struct opal_thread_t {
33-
opal_object_t super;
34-
opal_thread_fn_t t_run;
35-
void *t_arg;
36-
ABT_thread t_handle;
37-
void *t_ret;
38-
};
3930

4031
/* Argobots are cooperatively scheduled so yield when idle */
4132
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true

opal/mca/threads/base/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ headers += \
2424

2525
libmca_threads_la_SOURCES += \
2626
base/mutex.c \
27+
base/create_join.c \
2728
base/threads_base.c \
2829
base/tsd.c \
2930
base/wait_sync.c

opal/mca/threads/base/create_join.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4+
* University Research and Technology
5+
* Corporation. All rights reserved.
6+
* Copyright (c) 2004-2021 The University of Tennessee and The University
7+
* of Tennessee Research Foundation. All rights
8+
* reserved.
9+
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10+
* University of Stuttgart. All rights reserved.
11+
* Copyright (c) 2004-2005 The Regents of the University of California.
12+
* All rights reserved.
13+
* Copyright (c) 2007-2018 Los Alamos National Security, LLC. All rights
14+
* reserved.
15+
* Copyright (c) 2015-2016 Research Organization for Information Science
16+
* and Technology (RIST). All rights reserved.
17+
* Copyright (c) 2019 Sandia National Laboratories. All rights reserved.
18+
*
19+
* $COPYRIGHT$
20+
*
21+
* Additional copyrights may follow
22+
*
23+
* $HEADER$
24+
*/
25+
26+
#include <unistd.h>
27+
#include <pthread.h>
28+
29+
#include "opal/constants.h"
30+
#include "opal/mca/threads/threads.h"
31+
#include "opal/mca/threads/tsd.h"
32+
#include "opal/prefetch.h"
33+
#include "opal/util/output.h"
34+
#include "opal/util/sys_limits.h"
35+
36+
/*
37+
* Constructor
38+
*/
39+
static void opal_thread_construct(opal_thread_t *t)
40+
{
41+
t->t_run = 0;
42+
t->t_handle = (pthread_t) -1;
43+
}
44+
45+
OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL);
46+
47+
int opal_thread_start(opal_thread_t *t)
48+
{
49+
int rc;
50+
51+
if (OPAL_ENABLE_DEBUG) {
52+
if (NULL == t->t_run || (pthread_t) -1 != t->t_handle) {
53+
return OPAL_ERR_BAD_PARAM;
54+
}
55+
}
56+
57+
rc = pthread_create(&t->t_handle, NULL, (void *(*) (void *) ) t->t_run, t);
58+
59+
return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
60+
}
61+
62+
int opal_thread_join(opal_thread_t *t, void **thr_return)
63+
{
64+
int rc = pthread_join(t->t_handle, thr_return);
65+
t->t_handle = (pthread_t) -1;
66+
return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
67+
}
68+
69+
bool opal_thread_self_compare(opal_thread_t *t)
70+
{
71+
return pthread_self() == t->t_handle;
72+
}
73+
74+
opal_thread_t *opal_thread_get_self(void)
75+
{
76+
opal_thread_t *t = OBJ_NEW(opal_thread_t);
77+
t->t_handle = pthread_self();
78+
return t;
79+
}
80+
81+
void opal_thread_set_main(void)
82+
{
83+
}

opal/mca/threads/pthreads/threads_pthreads_module.c

Lines changed: 1 addition & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2006 The University of Tennessee and The University
6+
* Copyright (c) 2004-2021 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -32,58 +32,9 @@
3232
#include "opal/util/output.h"
3333
#include "opal/util/sys_limits.h"
3434

35-
/*
36-
* Constructor
37-
*/
38-
static void opal_thread_construct(opal_thread_t *t)
39-
{
40-
t->t_run = 0;
41-
t->t_handle = (pthread_t) -1;
42-
}
43-
44-
OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL);
45-
46-
int opal_thread_start(opal_thread_t *t)
47-
{
48-
int rc;
49-
50-
if (OPAL_ENABLE_DEBUG) {
51-
if (NULL == t->t_run || (pthread_t) -1 != t->t_handle) {
52-
return OPAL_ERR_BAD_PARAM;
53-
}
54-
}
55-
56-
rc = pthread_create(&t->t_handle, NULL, (void *(*) (void *) ) t->t_run, t);
57-
58-
return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
59-
}
60-
61-
int opal_thread_join(opal_thread_t *t, void **thr_return)
62-
{
63-
int rc = pthread_join(t->t_handle, thr_return);
64-
t->t_handle = (pthread_t) -1;
65-
return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
66-
}
67-
68-
bool opal_thread_self_compare(opal_thread_t *t)
69-
{
70-
return pthread_self() == t->t_handle;
71-
}
72-
73-
opal_thread_t *opal_thread_get_self(void)
74-
{
75-
opal_thread_t *t = OBJ_NEW(opal_thread_t);
76-
t->t_handle = pthread_self();
77-
return t;
78-
}
79-
8035
int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor)
8136
{
8237
int rc;
8338
rc = pthread_key_create(key, destructor);
8439
return 0 == rc ? OPAL_SUCCESS : OPAL_ERR_IN_ERRNO;
8540
}
86-
87-
void opal_thread_set_main(void)
88-
{
89-
}

opal/mca/threads/pthreads/threads_pthreads_threads.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2006 The University of Tennessee and The University
6+
* Copyright (c) 2004-2021 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
@@ -32,13 +32,6 @@
3232
#include "opal/mca/threads/pthreads/threads_pthreads.h"
3333
#include "opal/mca/threads/threads.h"
3434

35-
struct opal_thread_t {
36-
opal_object_t super;
37-
opal_thread_fn_t t_run;
38-
void *t_arg;
39-
pthread_t t_handle;
40-
};
41-
4235
/* Pthreads do not need to yield when idle */
4336
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false
4437

opal/mca/threads/qthreads/threads_qthreads_module.c

Lines changed: 1 addition & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2005 The University of Tennessee and The University
6+
* Copyright (c) 2004-2021 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -51,74 +51,6 @@ static inline void self_key_ensure_init(void)
5151
/* opal_thread_self_key has been already initialized. */
5252
}
5353

54-
/*
55-
* Constructor
56-
*/
57-
static void opal_thread_construct(opal_thread_t *t)
58-
{
59-
t->t_run = 0;
60-
t->t_thread_ret = 0;
61-
}
62-
63-
OBJ_CLASS_INSTANCE(opal_thread_t, opal_object_t, opal_thread_construct, NULL);
64-
65-
static inline aligned_t *opal_thread_get_qthreads_self(void)
66-
{
67-
self_key_ensure_init();
68-
void *ptr = qthread_getspecific(opal_thread_self_key);
69-
return (aligned_t *) ptr;
70-
}
71-
72-
static aligned_t opal_thread_qthreads_wrapper(void *arg)
73-
{
74-
opal_thread_t *t = (opal_thread_t *) arg;
75-
76-
/* Register itself. */
77-
self_key_ensure_init();
78-
qthread_setspecific(opal_thread_self_key, t->t_thread_ret_ptr);
79-
80-
t->t_ret = ((void *(*) (void *) ) t->t_run)(t);
81-
return 0;
82-
}
83-
84-
opal_thread_t *opal_thread_get_self(void)
85-
{
86-
opal_threads_ensure_init_qthreads();
87-
opal_thread_t *t = OBJ_NEW(opal_thread_t);
88-
t->t_thread_ret_ptr = opal_thread_get_qthreads_self();
89-
return t;
90-
}
91-
92-
bool opal_thread_self_compare(opal_thread_t *t)
93-
{
94-
opal_threads_ensure_init_qthreads();
95-
return opal_thread_get_qthreads_self() == &t->t_thread_ret;
96-
}
97-
98-
int opal_thread_join(opal_thread_t *t, void **thr_return)
99-
{
100-
qthread_readFF(NULL, t->t_thread_ret_ptr);
101-
if (thr_return) {
102-
*thr_return = t->t_ret;
103-
}
104-
t->t_thread_ret = 0;
105-
return OPAL_SUCCESS;
106-
}
107-
108-
void opal_thread_set_main(void)
109-
{
110-
}
111-
112-
int opal_thread_start(opal_thread_t *t)
113-
{
114-
opal_threads_ensure_init_qthreads();
115-
t->t_thread_ret_ptr = &t->t_thread_ret;
116-
qthread_fork(opal_thread_qthreads_wrapper, t, &t->t_thread_ret);
117-
return OPAL_SUCCESS;
118-
}
119-
120-
OBJ_CLASS_DECLARATION(opal_thread_t);
121-
12254
int opal_tsd_key_create(opal_tsd_key_t *key, opal_tsd_destructor_t destructor)
12355
{
12456
opal_threads_ensure_init_qthreads();

0 commit comments

Comments
 (0)