@@ -383,42 +383,83 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
383383 */
384384
385385static inline void
386- pack_predefined_heterogeneous ( opal_convertor_t * CONVERTOR ,
387- const dt_elem_desc_t * ELEM ,
388- size_t * COUNT ,
389- unsigned char * * SOURCE ,
390- unsigned char * * DESTINATION ,
391- size_t * SPACE )
386+ pack_predefined_heterogeneous (opal_convertor_t * CONVERTOR ,
387+ const dt_elem_desc_t * ELEM , size_t * COUNT ,
388+ unsigned char * * memory ,
389+ unsigned char * * packed , size_t * SPACE )
392390{
393- const opal_convertor_master_t * master = (CONVERTOR )-> master ;
394- const ddt_elem_desc_t * _elem = & ((ELEM )-> elem );
395- unsigned char * _source = (* SOURCE ) + _elem -> disp ;
396- ptrdiff_t advance ;
397- size_t _count = * (COUNT );
398- size_t _r_blength ;
399-
400- _r_blength = master -> remote_sizes [_elem -> common .type ];
401- if ( (_count * _r_blength ) > * (SPACE ) ) {
402- _count = (* (SPACE ) / _r_blength );
403- if ( 0 == _count ) return ; /* nothing to do */
391+ const opal_convertor_master_t * master = (CONVERTOR )-> master ;
392+ const ddt_elem_desc_t * _elem = & ((ELEM )-> elem );
393+ size_t cando_count = * (COUNT ), do_now_bytes ;
394+ size_t local_elem_size = opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
395+ size_t remote_elem_size = master -> remote_sizes [_elem -> common .type ];
396+ size_t blocklen_bytes = remote_elem_size ;
397+ unsigned char * _memory = (* memory ) + _elem -> disp ;
398+ unsigned char * _packed = * packed ;
399+ ptrdiff_t advance = 0 ;
400+
401+ assert (0 == (cando_count % _elem -> blocklen )); /* no partials here */
402+ assert (* (COUNT ) <= ((size_t ) _elem -> count * _elem -> blocklen ));
403+
404+ if ((remote_elem_size * cando_count ) > * (SPACE ))
405+ cando_count = (* SPACE ) / blocklen_bytes ;
406+
407+ /* premptively update the number of COUNT we will return. */
408+ * (COUNT ) -= cando_count ;
409+
410+ if (_elem -> blocklen == 1 ) {
411+ master -> pFunctions [_elem -> common .type ](CONVERTOR , cando_count ,
412+ _memory , * SPACE , _elem -> extent ,
413+ _packed , * SPACE , remote_elem_size ,
414+ & advance );
415+ _memory += cando_count * _elem -> extent ;
416+ _packed += cando_count * remote_elem_size ;
417+ goto update_and_return ;
404418 }
405419
406- OPAL_DATATYPE_SAFEGUARD_POINTER ( _source , (_count * _elem -> extent ), (CONVERTOR )-> pBaseBuf ,
407- (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
408- DO_DEBUG ( opal_output ( 0 , "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n" ,
409- ((ptrdiff_t )(opal_datatype_basicDatatypes [_elem -> common .type ]-> size ) == _elem -> extent ) ? "cont" : "----" ,
410- ((ptrdiff_t )_r_blength == _elem -> extent ) ? "cont" : "----" ,
411- (void * )* (DESTINATION ), (void * )_source , (unsigned long )_r_blength ,
412- (unsigned long )(* (SPACE )) ); );
413- master -> pFunctions [_elem -> common .type ]( CONVERTOR , _count ,
414- _source , * SPACE , _elem -> extent ,
415- * DESTINATION , * SPACE , _r_blength ,
416- & advance );
417- _r_blength *= _count ; /* update the remote length to encompass all the elements */
418- * (SOURCE ) += _count * _elem -> extent ;
419- * (DESTINATION ) += _r_blength ;
420- * (SPACE ) -= _r_blength ;
421- * (COUNT ) -= _count ;
420+ if ((1 < _elem -> count ) && (_elem -> blocklen <= cando_count )) {
421+ blocklen_bytes = remote_elem_size * _elem -> blocklen ;
422+
423+ do { /* Do as many full blocklen as possible */
424+ OPAL_DATATYPE_SAFEGUARD_POINTER (_memory , blocklen_bytes , (CONVERTOR )-> pBaseBuf ,
425+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
426+ DO_DEBUG (opal_output (0 , "pack 2. memcpy( %p, %p, %lu ) => space %lu\n" ,
427+ (void * ) _packed , (void * ) _memory , (unsigned long ) blocklen_bytes ,
428+ (unsigned long ) (* (SPACE ) - (_packed - * (packed )))););
429+ master -> pFunctions [_elem -> common .type ](CONVERTOR , _elem -> blocklen ,
430+ _memory , * SPACE , local_elem_size ,
431+ _packed , * SPACE , remote_elem_size ,
432+ & advance );
433+ _packed += blocklen_bytes ;
434+ _memory += _elem -> extent ;
435+ cando_count -= _elem -> blocklen ;
436+ } while (_elem -> blocklen <= cando_count );
437+ }
438+
439+ /**
440+ * As an epilog do anything left from the last blocklen.
441+ */
442+ if (0 != cando_count ) {
443+ assert ((cando_count < _elem -> blocklen )
444+ || ((1 == _elem -> count ) && (cando_count <= _elem -> blocklen )));
445+ do_now_bytes = cando_count * remote_elem_size ;
446+ OPAL_DATATYPE_SAFEGUARD_POINTER (_memory , do_now_bytes , (CONVERTOR )-> pBaseBuf ,
447+ (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
448+ DO_DEBUG (opal_output (0 , "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n" ,
449+ (void * ) _packed , (void * ) _memory , (unsigned long ) do_now_bytes ,
450+ (unsigned long ) (* (SPACE ) - (_packed - * (packed )))););
451+ master -> pFunctions [_elem -> common .type ](CONVERTOR , cando_count ,
452+ _memory , * SPACE , local_elem_size ,
453+ _packed , * SPACE , remote_elem_size ,
454+ & advance );
455+ _memory += do_now_bytes ;
456+ _packed += do_now_bytes ;
457+ }
458+
459+ update_and_return :
460+ * (memory ) = _memory - _elem -> disp ;
461+ * (SPACE ) -= (_packed - * packed );
462+ * (packed ) = _packed ;
422463}
423464
424465int32_t
0 commit comments