Skip to content

Commit 2d68804

Browse files
committed
communicator: make c_name a dynamic array and reorder struct
make the c_name element of the communicator structure a dynamic element. This allows us to reduce the size of PREDEFINED_COMMUNICATOR_PAD back to 512 to maintain backwards compatibility with the ompi 4.1.x release series. Reorder the communicator fields to reduce the struct size. This brings the communicator size at 536 bytes with FT, PERUSE enabled and compiled in debug mode. Fixes issue #11373 Signed-off-by: Edgar Gabriel <edgar.gabriel@amd.com> Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent e5abeb8 commit 2d68804

File tree

8 files changed

+53
-58
lines changed

8 files changed

+53
-58
lines changed

ompi/communicator/comm.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
2727
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
2828
* reserved.
29+
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
2930
* $COPYRIGHT$
3031
*
3132
* Additional copyrights may follow
@@ -215,6 +216,11 @@ int ompi_comm_set_nb (ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm,
215216
if (NULL == newcomm) {
216217
return OMPI_ERR_OUT_OF_RESOURCE;
217218
}
219+
newcomm->c_name = (char*) malloc (OPAL_MAX_OBJECT_NAME);
220+
if (NULL == newcomm->c_name) {
221+
return OMPI_ERR_OUT_OF_RESOURCE;
222+
}
223+
newcomm->c_name[0] = '\0';
218224
newcomm->super.s_info = NULL;
219225
/* fill in the inscribing hyper-cube dimensions */
220226
newcomm->c_cube_dim = opal_cube_dim(local_size);

ompi/communicator/comm_init.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2023 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -25,6 +25,7 @@
2525
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
2626
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
2727
* reserved.
28+
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
2829
* $COPYRIGHT$
2930
*
3031
* Additional copyrights may follow
@@ -169,8 +170,7 @@ int ompi_comm_init(void)
169170
(void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 0, &ompi_mpi_comm_null);
170171
(void)opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, 1, &ompi_mpi_comm_null);
171172

172-
opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL",
173-
sizeof(ompi_mpi_comm_null.comm.c_name));
173+
ompi_mpi_comm_null.comm.c_name = strdup ("MPI_COMM_NULL");
174174
ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC |
175175
OMPI_COMM_GLOBAL_INDEX;
176176

@@ -221,8 +221,7 @@ int ompi_comm_init_mpi3 (void)
221221
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm);
222222
opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world);
223223

224-
opal_string_copy(ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD",
225-
sizeof(ompi_mpi_comm_world.comm.c_name));
224+
ompi_mpi_comm_world.comm.c_name = strdup("MPI_COMM_WORLD");
226225
ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC |
227226
OMPI_COMM_GLOBAL_INDEX;
228227
ompi_mpi_comm_world.comm.instance = group->grp_instance;
@@ -280,8 +279,7 @@ int ompi_comm_init_mpi3 (void)
280279
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm);
281280
opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self);
282281

283-
opal_string_copy(ompi_mpi_comm_self.comm.c_name, "MPI_COMM_SELF",
284-
sizeof(ompi_mpi_comm_self.comm.c_name));
282+
ompi_mpi_comm_self.comm.c_name = strdup("MPI_COMM_SELF");
285283
ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC |
286284
OMPI_COMM_GLOBAL_INDEX;
287285
ompi_mpi_comm_self.comm.instance = group->grp_instance;
@@ -412,7 +410,7 @@ static int ompi_comm_finalize (void)
412410
static void ompi_comm_construct(ompi_communicator_t* comm)
413411
{
414412
int idx;
415-
comm->c_name[0] = '\0';
413+
comm->c_name = NULL;
416414
comm->c_index = MPI_UNDEFINED;
417415
comm->c_flags = 0;
418416
comm->c_my_rank = 0;
@@ -444,7 +442,7 @@ static void ompi_comm_construct(ompi_communicator_t* comm)
444442
this communicator */
445443
comm->c_keyhash = NULL;
446444

447-
comm->errhandler_type = OMPI_ERRHANDLER_TYPE_COMM;
445+
comm->error_handler = &ompi_mpi_errors_are_fatal.eh;
448446
#ifdef OMPI_WANT_PERUSE
449447
comm->c_peruse_handles = NULL;
450448
#endif
@@ -520,6 +518,11 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
520518
comm->error_handler = NULL;
521519
}
522520

521+
if (NULL != comm->c_name) {
522+
free (comm->c_name);
523+
comm->c_name = NULL;
524+
}
525+
523526
#if OPAL_ENABLE_FT_MPI
524527
if( NULL != comm->agreement_specific ) {
525528
OBJ_RELEASE( comm->agreement_specific );

ompi/communicator/communicator.h

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2023 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -24,6 +24,7 @@
2424
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
2525
* Copyright (c) 2018-2022 Triad National Security, LLC. All rights
2626
* reserved.
27+
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
2728
* $COPYRIGHT$
2829
*
2930
* Additional copyrights may follow
@@ -261,20 +262,30 @@ struct ompi_communicator_t {
261262
opal_infosubscriber_t super;
262263
opal_mutex_t c_lock; /* mutex for name and potentially
263264
attributes */
264-
char c_name[MPI_MAX_OBJECT_NAME];
265+
char *c_name;
265266
ompi_comm_extended_cid_t c_contextid;
266267
ompi_comm_extended_cid_block_t c_contextidb;
267268
uint32_t c_index;
268269
int c_my_rank;
269270
uint32_t c_flags; /* flags, e.g. intercomm,
270271
topology, etc. */
271272
uint32_t c_assertions; /* info assertions */
272-
int c_id_available; /* the currently available Cid for allocation
273-
to a child*/
274-
int c_id_start_index; /* the starting index of the block of cids
275-
allocated to this communicator*/
273+
#if OPAL_ENABLE_FT_MPI
276274
uint32_t c_epoch; /* Identifier used to differentiate between two communicators
277275
using the same c_contextid (not at the same time, obviously) */
276+
#endif
277+
/* Non-blocking collective tag. These tags might be shared between
278+
* all non-blocking collective modules (to avoid message collision
279+
* between them in the case where multiple outstanding non-blocking
280+
* collective coexists using multiple backends).
281+
*/
282+
opal_atomic_int32_t c_nbc_tag;
283+
284+
/**< inscribing cube dimension */
285+
int c_cube_dim;
286+
287+
/* index in Fortran <-> C translation array */
288+
int c_f_to_c_index;
278289

279290
ompi_group_t *c_local_group;
280291
ompi_group_t *c_remote_group;
@@ -287,16 +298,10 @@ struct ompi_communicator_t {
287298
/* Attributes */
288299
struct opal_hash_table_t *c_keyhash;
289300

290-
/**< inscribing cube dimension */
291-
int c_cube_dim;
292-
293301
/* Standard information about the selected topology module (or NULL
294302
if this is not a cart, graph or dist graph communicator) */
295303
struct mca_topo_base_module_t* c_topo;
296304

297-
/* index in Fortran <-> C translation array */
298-
int c_f_to_c_index;
299-
300305
#ifdef OMPI_WANT_PERUSE
301306
/*
302307
* Place holder for the PERUSE events.
@@ -307,9 +312,7 @@ struct ompi_communicator_t {
307312
/* Error handling. This field does not have the "c_" prefix so
308313
that the OMPI_ERRHDL_* macros can find it, regardless of whether
309314
it's a comm, window, or file. */
310-
311315
ompi_errhandler_t *error_handler;
312-
ompi_errhandler_type_t errhandler_type;
313316

314317
/* Hooks for PML to hang things */
315318
struct mca_pml_comm_t *c_pml_comm;
@@ -320,21 +323,14 @@ struct ompi_communicator_t {
320323
/* Collectives module interface and data */
321324
mca_coll_base_comm_coll_t *c_coll;
322325

323-
/* Non-blocking collective tag. These tags might be shared between
324-
* all non-blocking collective modules (to avoid message collision
325-
* between them in the case where multiple outstanding non-blocking
326-
* collective coexists using multiple backends).
327-
*/
328-
opal_atomic_int32_t c_nbc_tag;
329-
330326
/* instance that this comm belongs to */
331327
ompi_instance_t* instance;
332328

333329
#if OPAL_ENABLE_FT_MPI
334-
/** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */
335-
int any_source_offset;
336330
/** agreement caching info for topology and previous returned decisions */
337331
opal_object_t *agreement_specific;
332+
/** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */
333+
int any_source_offset;
338334
/** Are MPI_ANY_SOURCE operations enabled? - OMPI_Comm_failure_ack */
339335
bool any_source_enabled;
340336
/** Has this communicator been revoked - OMPI_Comm_revoke() */
@@ -437,7 +433,7 @@ typedef struct ompi_communicator_t ompi_communicator_t;
437433
* the PREDEFINED_COMMUNICATOR_PAD macro?
438434
* A: Most likely not, but it would be good to check.
439435
*/
440-
#define PREDEFINED_COMMUNICATOR_PAD 1024
436+
#define PREDEFINED_COMMUNICATOR_PAD 512
441437

442438
struct ompi_predefined_communicator_t {
443439
struct ompi_communicator_t comm;

ompi/debuggers/predefined_gap_test.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (c) 2009 Sun Microsystems, Inc All rights reserved.
33
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
4-
* Copyright (c) 2012-2013 The University of Tennessee and The University
4+
* Copyright (c) 2012-2023 The University of Tennessee and The University
55
* of Tennessee Research Foundation. All rights
66
* reserved.
77
* Copyright (c) 2012-2013 Inria. All rights reserved.
@@ -59,8 +59,6 @@ int main(int argc, char **argv) {
5959
GAP_CHECK("c_contextid", test_comm, c_contextid, c_name, 1);
6060
GAP_CHECK("c_my_rank", test_comm, c_my_rank, c_contextid, 1);
6161
GAP_CHECK("c_flags", test_comm, c_flags, c_my_rank, 1);
62-
GAP_CHECK("c_id_available", test_comm, c_id_available, c_flags, 1);
63-
GAP_CHECK("c_id_start_index", test_comm, c_id_start_index, c_id_available, 1);
6462
GAP_CHECK("c_remote_group", test_comm, c_remote_group, c_local_group, 1);
6563
GAP_CHECK("c_local_comm", test_comm, c_local_comm, c_remote_group, 1);
6664
GAP_CHECK("c_keyhash", test_comm, c_keyhash, c_local_comm, 1);
@@ -73,8 +71,6 @@ int main(int argc, char **argv) {
7371
#else
7472
GAP_CHECK("error_handler", test_comm, error_handler, c_f_to_c_index, 1);
7573
#endif
76-
GAP_CHECK("errhandler_type", test_comm, errhandler_type, error_handler, 1);
77-
GAP_CHECK("c_pml_comm", test_comm, c_pml_comm, errhandler_type, 1);
7874
GAP_CHECK("c_coll", test_comm, c_coll, c_pml_comm, 1);
7975

8076
/* Test Predefined group sizes */
@@ -129,8 +125,6 @@ int main(int argc, char **argv) {
129125
GAP_CHECK("w_keyhash", test_win, w_keyhash, w_flags, 1);
130126
GAP_CHECK("w_f_to_c_index", test_win, w_f_to_c_index, w_keyhash, 1);
131127
GAP_CHECK("error_handler", test_win, error_handler, w_f_to_c_index, 1);
132-
GAP_CHECK("errhandler_type", test_win, errhandler_type, error_handler, 1);
133-
GAP_CHECK("w_osc_module", test_win, w_osc_module, errhandler_type, 1);
134128

135129
/* Test Predefined info sizes */
136130
printf("=============================================\n");
@@ -151,8 +145,6 @@ int main(int argc, char **argv) {
151145
GAP_CHECK("f_flags", test_file, f_flags, f_amode, 1);
152146
GAP_CHECK("f_f_to_c_index", test_file, f_f_to_c_index, f_flags, 1);
153147
GAP_CHECK("error_handler", test_file, error_handler, f_f_to_c_index, 1);
154-
GAP_CHECK("errhandler_type", test_file, errhandler_type, error_handler, 1);
155-
GAP_CHECK("f_io_version", test_file, f_io_version, errhandler_type, 1);
156148
GAP_CHECK("f_io_selected_component", test_file, f_io_selected_component, f_io_version, 1);
157149
GAP_CHECK("f_io_selected_module", test_file, f_io_selected_module, f_io_selected_component, 1);
158150
GAP_CHECK("f_io_selected_data", test_file, f_io_selected_data, f_io_selected_module, 1);

ompi/errhandler/errhandler.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2022 The University of Tennessee and The University
6+
* Copyright (c) 2004-2023 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -388,10 +388,10 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo
388388
OMPI_NAME_PRINT(&ompi_proc->super.proc_name),
389389
ompi_comm_print_cid(comm),
390390
proc_rank,
391-
(OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->errhandler_type ? "P" :
392-
(OMPI_ERRHANDLER_TYPE_COMM == comm->errhandler_type ? "C" :
393-
(OMPI_ERRHANDLER_TYPE_WIN == comm->errhandler_type ? "W" :
394-
(OMPI_ERRHANDLER_TYPE_FILE == comm->errhandler_type ? "F" : "U") ) ) )
391+
(OMPI_ERRHANDLER_TYPE_PREDEFINED == comm->error_handler->eh_mpi_object_type ? "P" :
392+
(OMPI_ERRHANDLER_TYPE_COMM == comm->error_handler->eh_mpi_object_type ? "C" :
393+
(OMPI_ERRHANDLER_TYPE_WIN == comm->error_handler->eh_mpi_object_type ? "W" :
394+
(OMPI_ERRHANDLER_TYPE_FILE == comm->error_handler->eh_mpi_object_type ? "F" : "U") ) ) )
395395
));
396396
}
397397

ompi/errhandler/errhandler.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2022 The University of Tennessee and The University
6+
* Copyright (c) 2004-2023 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -238,7 +238,7 @@ extern opal_atomic_int32_t ompi_instance_count;
238238
#define OMPI_ERRHANDLER_INVOKE(mpi_object, err_code, message) \
239239
ompi_errhandler_invoke((mpi_object)->error_handler, \
240240
(mpi_object), \
241-
(int)(mpi_object)->errhandler_type, \
241+
(int)(mpi_object)->error_handler->eh_mpi_object_type, \
242242
ompi_errcode_get_mpi_code(err_code), \
243243
(message));
244244

@@ -269,7 +269,7 @@ extern opal_atomic_int32_t ompi_instance_count;
269269
int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \
270270
ompi_errhandler_invoke((mpi_object)->error_handler, \
271271
(mpi_object), \
272-
(int) (mpi_object)->errhandler_type, \
272+
(int) (mpi_object)->error_handler->eh_mpi_object_type, \
273273
(__mpi_err_code), \
274274
(message)); \
275275
return (__mpi_err_code); \
@@ -307,7 +307,7 @@ extern opal_atomic_int32_t ompi_instance_count;
307307
int __mpi_err_code = ompi_errcode_get_mpi_code(err_code); \
308308
ompi_errhandler_invoke((mpi_object)->error_handler, \
309309
(mpi_object), \
310-
(int)(mpi_object)->errhandler_type, \
310+
(int)(mpi_object)->error_handler->eh_mpi_object_type, \
311311
(__mpi_err_code), \
312312
(message)); \
313313
return (__mpi_err_code); \

ompi/errhandler/errhandler_invoke.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2023 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -183,19 +183,19 @@ int ompi_errhandler_request_invoke(int count,
183183
case OMPI_REQUEST_COLL:
184184
return ompi_errhandler_invoke(mpi_object.comm->error_handler,
185185
mpi_object.comm,
186-
mpi_object.comm->errhandler_type,
186+
mpi_object.comm->error_handler->eh_mpi_object_type,
187187
ec, message);
188188
break;
189189
case OMPI_REQUEST_IO:
190190
return ompi_errhandler_invoke(mpi_object.file->error_handler,
191191
mpi_object.file,
192-
mpi_object.file->errhandler_type,
192+
mpi_object.file->error_handler->eh_mpi_object_type,
193193
ec, message);
194194
break;
195195
case OMPI_REQUEST_WIN:
196196
return ompi_errhandler_invoke(mpi_object.win->error_handler,
197197
mpi_object.win,
198-
mpi_object.win->errhandler_type,
198+
mpi_object.win->error_handler->eh_mpi_object_type,
199199
ec, message);
200200
break;
201201
default:

ompi/include/ompi/memchecker.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
33
* University of Stuttgart. All rights reserved.
4-
* Copyright (c) 2010-2017 The University of Tennessee and The University
4+
* Copyright (c) 2010-2023 The University of Tennessee and The University
55
* of Tennessee Research Foundation. All rights
66
* reserved.
77
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
@@ -220,8 +220,6 @@ static inline int memchecker_comm(MPI_Comm comm)
220220
opal_memchecker_base_isdefined (&comm->c_name, MPI_MAX_OBJECT_NAME);
221221
opal_memchecker_base_isdefined (&comm->c_my_rank, sizeof(int));
222222
opal_memchecker_base_isdefined (&comm->c_flags, sizeof(uint32_t));
223-
opal_memchecker_base_isdefined (&comm->c_id_available, sizeof(int));
224-
opal_memchecker_base_isdefined (&comm->c_id_start_index, sizeof(int));
225223
opal_memchecker_base_isdefined (&comm->c_local_group, sizeof(ompi_group_t *));
226224
opal_memchecker_base_isdefined (&comm->c_remote_group, sizeof(ompi_group_t *));
227225
opal_memchecker_base_isdefined (&comm->c_keyhash, sizeof(struct opal_hash_table_t *));

0 commit comments

Comments
 (0)