Skip to content

Commit 4ef1243

Browse files
authored
Merge pull request #8859 from hppritcha/topic/fix_data_type_issue
v4.0.x: Fix "partial" datatype issue
2 parents b5ad71c + 2fe7de1 commit 4ef1243

14 files changed

+638
-262
lines changed

config/opal_check_cuda.m4

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ AS_IF([test "$opal_check_cuda_happy" = "yes"],
9191
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
9292
# which was first introduced in CUDA 6.0.
9393
AS_IF([test "$opal_check_cuda_happy"="yes"],
94-
AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0],
95-
[#include <$opal_cuda_incdir/cuda.h>]),
94+
[AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0],
95+
[#include <$opal_cuda_incdir/cuda.h>])],
9696
[])
9797

9898
# If we have CUDA support, check to see if we have CUDA 6.0 or later.

configure.ac

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1369,10 +1369,13 @@ OPAL_SETUP_WRAPPER_FINAL
13691369
# autoconf macro defines in mpi.h. Since AC sometimes changes whether
13701370
# things are defined as null tokens or an integer result, two projects
13711371
# with different versions of AC can cause problems.
1372-
if test $ac_cv_header_stdc = yes; then
1373-
AC_DEFINE(OPAL_STDC_HEADERS, 1,
1374-
[Do not use outside of mpi.h. Define to 1 if you have the ANSI C header files.])
1375-
fi
1372+
1373+
# According to the autoconf 2.67 documentation the AC_HEADER_STDC macro,
1374+
# and therefore the ac_cv_header_stdc cache variable, is obsolescent, as
1375+
# current systems have conforming header files. Instead of removing the
1376+
# protection completely, let's just make sure it is always on.
1377+
AC_DEFINE(OPAL_STDC_HEADERS, 1,
1378+
[Do not use outside of mpi.h. Define to 1 if you have the ANSI C header files.])
13761379
if test $ac_cv_header_sys_time_h = yes ; then
13771380
AC_DEFINE(OPAL_HAVE_SYS_TIME_H, 1,
13781381
[Do not use outside of mpi.h. Define to 1 if you have the <sys/time.h> header file.])

opal/datatype/opal_convertor.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,8 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
483483
pConvertor->remote_size = pConvertor->local_size;
484484
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
485485
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
486-
if (!(pConvertor->flags & CONVERTOR_SEND && pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS)) {
486+
/* Can we use the optimized description? */
487+
if (pConvertor->flags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) {
487488
pConvertor->use_desc = &(datatype->desc);
488489
}
489490
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {

opal/datatype/opal_datatype.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,18 @@ BEGIN_C_DECLS
7575
* We should make the difference here between the predefined contiguous and non contiguous
7676
* datatypes. The OPAL_DATATYPE_FLAG_BASIC is held by all predefined contiguous datatypes.
7777
*/
78-
#define OPAL_DATATYPE_FLAG_BASIC (OPAL_DATATYPE_FLAG_PREDEFINED | \
79-
OPAL_DATATYPE_FLAG_CONTIGUOUS | \
80-
OPAL_DATATYPE_FLAG_NO_GAPS | \
81-
OPAL_DATATYPE_FLAG_DATA | \
82-
OPAL_DATATYPE_FLAG_COMMITTED)
78+
#define OPAL_DATATYPE_FLAG_BASIC \
79+
(OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS \
80+
| OPAL_DATATYPE_FLAG_DATA | OPAL_DATATYPE_FLAG_COMMITTED)
81+
/*
82+
* If during the datatype optimization process we collapse contiguous elements with
83+
* different types, we cannot use this optimized description for any communication
84+
* in a heterogeneous setting, especially not for the exteranl32 support.
85+
*
86+
* A datatype with this flag cannot use the optimized description in heterogeneous
87+
* setups.
88+
*/
89+
#define OPAL_DATATYPE_OPTIMIZED_RESTRICTED 0x1000
8390

8491
/**
8592
* The number of supported entries in the data-type definition and the

opal/datatype/opal_datatype_dump.c

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,39 @@ int opal_datatype_contain_basic_datatypes( const opal_datatype_t* pData, char* p
6262
int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t length )
6363
{
6464
int index = 0;
65-
if( length < 22 ) return 0;
66-
index = snprintf( ptr, 22, "-----------[---][---]" ); /* set everything to - */
67-
if( usflags & OPAL_DATATYPE_FLAG_COMMITTED ) ptr[1] = 'c';
68-
if( usflags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) ptr[2] = 'C';
69-
if( usflags & OPAL_DATATYPE_FLAG_OVERLAP ) ptr[3] = 'o';
70-
if( usflags & OPAL_DATATYPE_FLAG_USER_LB ) ptr[4] = 'l';
71-
if( usflags & OPAL_DATATYPE_FLAG_USER_UB ) ptr[5] = 'u';
72-
if( usflags & OPAL_DATATYPE_FLAG_PREDEFINED ) ptr[6] = 'P';
73-
if( !(usflags & OPAL_DATATYPE_FLAG_NO_GAPS) ) ptr[7] = 'G';
74-
if( usflags & OPAL_DATATYPE_FLAG_DATA ) ptr[8] = 'D';
75-
if( (usflags & OPAL_DATATYPE_FLAG_BASIC) == OPAL_DATATYPE_FLAG_BASIC ) ptr[9] = 'B';
65+
if (length < 22) {
66+
return 0;
67+
}
68+
index = snprintf(ptr, 22, "-----------[---][---]"); /* set everything to - */
69+
if (usflags & OPAL_DATATYPE_FLAG_COMMITTED) {
70+
ptr[1] = 'c';
71+
}
72+
if (usflags & OPAL_DATATYPE_FLAG_CONTIGUOUS) {
73+
ptr[2] = 'C';
74+
}
75+
if (usflags & OPAL_DATATYPE_FLAG_OVERLAP) {
76+
ptr[3] = 'o';
77+
}
78+
if (usflags & OPAL_DATATYPE_FLAG_USER_LB) {
79+
ptr[4] = 'l';
80+
}
81+
if (usflags & OPAL_DATATYPE_FLAG_USER_UB) {
82+
ptr[5] = 'u';
83+
}
84+
if (usflags & OPAL_DATATYPE_FLAG_PREDEFINED) {
85+
ptr[6] = 'P';
86+
}
87+
if (!(usflags & OPAL_DATATYPE_FLAG_NO_GAPS)) {
88+
ptr[7] = 'G';
89+
}
90+
if (usflags & OPAL_DATATYPE_FLAG_DATA) {
91+
ptr[8] = 'D';
92+
}
93+
if ((usflags & OPAL_DATATYPE_FLAG_BASIC) == OPAL_DATATYPE_FLAG_BASIC) {
94+
ptr[9] = 'B';
95+
} else if (usflags & OPAL_DATATYPE_OPTIMIZED_RESTRICTED) {
96+
ptr[9] = 'H'; /* optimized description restricted to homogeneous cases */
97+
}
7698
/* We know nothing about the upper level language or flags! */
7799
/* ... */
78100
return index;

opal/datatype/opal_datatype_internal.h

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -36,51 +36,16 @@
3636

3737
extern int opal_datatype_dfd;
3838

39-
# define DDT_DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) \
40-
opal_datatype_dump_stack( (PSTACK), (STACK_POS), (PDESC), (NAME) )
41-
# if defined(ACCEPT_C99)
42-
# define DUMP( ARGS... ) opal_output(opal_datatype_dfd, __VA_ARGS__)
43-
# else
44-
# if defined(__GNUC__) && !defined(__STDC__)
45-
# define DUMP(ARGS...) opal_output( opal_datatype_dfd, ARGS)
46-
# else
47-
static inline void DUMP( char* fmt, ... )
48-
{
49-
va_list list;
39+
# define DDT_DUMP_STACK(PSTACK, STACK_POS, PDESC, NAME) \
40+
opal_datatype_dump_stack((PSTACK), (STACK_POS), (PDESC), (NAME))
41+
42+
# define DUMP(...) opal_output(opal_datatype_dfd, __VA_ARGS__)
5043

51-
va_start( list, fmt );
52-
opal_output_vverbose( 0, opal_datatype_dfd, fmt, list );
53-
va_end( list );
54-
}
55-
# endif /* __GNUC__ && !__STDC__ */
56-
# endif /* ACCEPT_C99 */
5744
#else
58-
# define DDT_DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME )
59-
# if defined(ACCEPT_C99)
60-
# define DUMP(ARGS...)
61-
# else
62-
# if defined(__GNUC__) && !defined(__STDC__)
63-
# define DUMP(ARGS...)
64-
# else
65-
/* If we do not compile with PGI, mark the parameter as unused */
66-
# if !defined(__PGI)
67-
# define __opal_attribute_unused_tmp__ __opal_attribute_unused__
68-
# else
69-
# define __opal_attribute_unused_tmp__
70-
# endif
71-
static inline void DUMP( char* fmt __opal_attribute_unused_tmp__, ... )
72-
{
73-
#if defined(__PGI)
74-
/* Some compilers complain if we have "..." arguments and no
75-
corresponding va_start() */
76-
va_list arglist;
77-
va_start(arglist, fmt);
78-
va_end(arglist);
79-
#endif
80-
}
81-
# undef __opal_attribute_unused_tmp__
82-
# endif /* __GNUC__ && !__STDC__ */
83-
# endif /* ACCEPT_C99 */
45+
46+
# define DDT_DUMP_STACK(PSTACK, STACK_POS, PDESC, NAME)
47+
# define DUMP(...)
48+
8449
#endif /* VERBOSE */
8550

8651

opal/datatype/opal_datatype_optimize.c

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
8787
compress.blocklen = pData->desc.desc[pos_desc + index].elem.blocklen;
8888
for( uint32_t i = index+1; i < loop->items; i++ ) {
8989
current = &pData->desc.desc[pos_desc + i].elem;
90-
assert(1 == current->count);
91-
if( (current->common.type == OPAL_DATATYPE_LOOP) ||
92-
compress.common.type != current->common.type ) {
93-
compress.common.type = OPAL_DATATYPE_UINT1;
90+
assert(1 == current->count);
91+
if ((current->common.type == OPAL_DATATYPE_LOOP)
92+
|| compress.common.type != current->common.type) {
93+
compress.common.type = OPAL_DATATYPE_UINT1;
94+
compress.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
95+
pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
9496
compress.blocklen = end_loop->size;
9597
break;
9698
}
@@ -174,12 +176,14 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
174176
/* are the two elements compatible: aka they have very similar values and they
175177
* can be merged together by increasing the count, and/or changing the extent.
176178
*/
177-
if( (last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
178-
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size) ) {
179-
ddt_elem_desc_t save = last; /* safekeep the type and blocklen */
180-
if( last.common.type != current->common.type ) {
181-
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
182-
last.common.type = OPAL_DATATYPE_UINT1;
179+
if ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size)
180+
== (current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) {
181+
ddt_elem_desc_t save = last; /* safekeep the type and blocklen */
182+
if (last.common.type != current->common.type) {
183+
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
184+
last.common.type = OPAL_DATATYPE_UINT1;
185+
last.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
186+
pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
183187
}
184188

185189
if( (last.extent * (ptrdiff_t)last.count + last.disp) == current->disp ) {
@@ -225,9 +229,14 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
225229
if( last.common.type == current->common.type ) {
226230
last.blocklen += current->blocklen;
227231
} else {
228-
last.blocklen = ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) +
229-
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size));
230-
last.common.type = OPAL_DATATYPE_UINT1;
232+
last.blocklen = ((last.blocklen
233+
* opal_datatype_basicDatatypes[last.common.type]->size)
234+
+ (current->blocklen
235+
* opal_datatype_basicDatatypes[current->common.type]
236+
->size));
237+
last.common.type = OPAL_DATATYPE_UINT1;
238+
last.common.flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
239+
pData->flags |= OPAL_DATATYPE_OPTIMIZED_RESTRICTED;
231240
}
232241
last.extent += current->extent;
233242
if( current->count != 1 ) {

opal/datatype/opal_datatype_pack.c

Lines changed: 74 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -383,42 +383,83 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
383383
*/
384384

385385
static inline void
386-
pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR,
387-
const dt_elem_desc_t* ELEM,
388-
size_t* COUNT,
389-
unsigned char** SOURCE,
390-
unsigned char** DESTINATION,
391-
size_t* SPACE )
386+
pack_predefined_heterogeneous(opal_convertor_t *CONVERTOR,
387+
const dt_elem_desc_t *ELEM, size_t *COUNT,
388+
unsigned char **memory,
389+
unsigned char **packed, size_t *SPACE)
392390
{
393-
const opal_convertor_master_t* master = (CONVERTOR)->master;
394-
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
395-
unsigned char* _source = (*SOURCE) + _elem->disp;
396-
ptrdiff_t advance;
397-
size_t _count = *(COUNT);
398-
size_t _r_blength;
399-
400-
_r_blength = master->remote_sizes[_elem->common.type];
401-
if( (_count * _r_blength) > *(SPACE) ) {
402-
_count = (*(SPACE) / _r_blength);
403-
if( 0 == _count ) return; /* nothing to do */
391+
const opal_convertor_master_t *master = (CONVERTOR)->master;
392+
const ddt_elem_desc_t *_elem = &((ELEM)->elem);
393+
size_t cando_count = *(COUNT), do_now_bytes;
394+
size_t local_elem_size = opal_datatype_basicDatatypes[_elem->common.type]->size;
395+
size_t remote_elem_size = master->remote_sizes[_elem->common.type];
396+
size_t blocklen_bytes = remote_elem_size;
397+
unsigned char *_memory = (*memory) + _elem->disp;
398+
unsigned char *_packed = *packed;
399+
ptrdiff_t advance = 0;
400+
401+
assert(0 == (cando_count % _elem->blocklen)); /* no partials here */
402+
assert(*(COUNT) <= ((size_t) _elem->count * _elem->blocklen));
403+
404+
if ((remote_elem_size * cando_count) > *(SPACE))
405+
cando_count = (*SPACE) / blocklen_bytes;
406+
407+
/* premptively update the number of COUNT we will return. */
408+
*(COUNT) -= cando_count;
409+
410+
if (_elem->blocklen == 1) {
411+
master->pFunctions[_elem->common.type](CONVERTOR, cando_count,
412+
_memory, *SPACE, _elem->extent,
413+
_packed, *SPACE, remote_elem_size,
414+
&advance);
415+
_memory += cando_count * _elem->extent;
416+
_packed += cando_count * remote_elem_size;
417+
goto update_and_return;
404418
}
405419

406-
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, (_count * _elem->extent), (CONVERTOR)->pBaseBuf,
407-
(CONVERTOR)->pDesc, (CONVERTOR)->count );
408-
DO_DEBUG( opal_output( 0, "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n",
409-
((ptrdiff_t)(opal_datatype_basicDatatypes[_elem->common.type]->size) == _elem->extent) ? "cont" : "----",
410-
((ptrdiff_t)_r_blength == _elem->extent) ? "cont" : "----",
411-
(void*)*(DESTINATION), (void*)_source, (unsigned long)_r_blength,
412-
(unsigned long)(*(SPACE)) ); );
413-
master->pFunctions[_elem->common.type]( CONVERTOR, _count,
414-
_source, *SPACE, _elem->extent,
415-
*DESTINATION, *SPACE, _r_blength,
416-
&advance );
417-
_r_blength *= _count; /* update the remote length to encompass all the elements */
418-
*(SOURCE) += _count * _elem->extent;
419-
*(DESTINATION) += _r_blength;
420-
*(SPACE) -= _r_blength;
421-
*(COUNT) -= _count;
420+
if ((1 < _elem->count) && (_elem->blocklen <= cando_count)) {
421+
blocklen_bytes = remote_elem_size * _elem->blocklen;
422+
423+
do { /* Do as many full blocklen as possible */
424+
OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
425+
(CONVERTOR)->pDesc, (CONVERTOR)->count);
426+
DO_DEBUG(opal_output(0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
427+
(void *) _packed, (void *) _memory, (unsigned long) blocklen_bytes,
428+
(unsigned long) (*(SPACE) - (_packed - *(packed)))););
429+
master->pFunctions[_elem->common.type](CONVERTOR, _elem->blocklen,
430+
_memory, *SPACE, local_elem_size,
431+
_packed, *SPACE, remote_elem_size,
432+
&advance);
433+
_packed += blocklen_bytes;
434+
_memory += _elem->extent;
435+
cando_count -= _elem->blocklen;
436+
} while (_elem->blocklen <= cando_count);
437+
}
438+
439+
/**
440+
* As an epilog do anything left from the last blocklen.
441+
*/
442+
if (0 != cando_count) {
443+
assert((cando_count < _elem->blocklen)
444+
|| ((1 == _elem->count) && (cando_count <= _elem->blocklen)));
445+
do_now_bytes = cando_count * remote_elem_size;
446+
OPAL_DATATYPE_SAFEGUARD_POINTER(_memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
447+
(CONVERTOR)->pDesc, (CONVERTOR)->count);
448+
DO_DEBUG(opal_output(0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
449+
(void *) _packed, (void *) _memory, (unsigned long) do_now_bytes,
450+
(unsigned long) (*(SPACE) - (_packed - *(packed)))););
451+
master->pFunctions[_elem->common.type](CONVERTOR, cando_count,
452+
_memory, *SPACE, local_elem_size,
453+
_packed, *SPACE, remote_elem_size,
454+
&advance);
455+
_memory += do_now_bytes;
456+
_packed += do_now_bytes;
457+
}
458+
459+
update_and_return:
460+
*(memory) = _memory - _elem->disp;
461+
*(SPACE) -= (_packed - *packed);
462+
*(packed) = _packed;
422463
}
423464

424465
int32_t

0 commit comments

Comments
 (0)