@@ -66,12 +66,14 @@ ompi_coll_base_scatter_intra_binomial(
6666 int root , struct ompi_communicator_t * comm ,
6767 mca_coll_base_module_t * module )
6868{
69- int line = -1 , rank , vrank , size , err ;
70- char * ptmp , * tempbuf = NULL ;
71- MPI_Status status ;
7269 mca_coll_base_module_t * base_module = (mca_coll_base_module_t * )module ;
7370 mca_coll_base_comm_t * data = base_module -> base_data ;
74- ptrdiff_t sextent , rextent , ssize , rsize , sgap = 0 , rgap = 0 ;
71+ int line = -1 , rank , vrank , size , err , packed_size , curr_count ;
72+ char * ptmp , * tempbuf = NULL ;
73+ size_t max_data , packed_sizet ;
74+ opal_convertor_t convertor ;
75+ ptrdiff_t sextent ;
76+ MPI_Status status ;
7577
7678 size = ompi_comm_size (comm );
7779 rank = ompi_comm_rank (comm );
@@ -89,99 +91,95 @@ ompi_coll_base_scatter_intra_binomial(
8991 vrank = (rank - root + size ) % size ;
9092 ptmp = (char * )rbuf ; /* by default suppose leaf nodes, just use rbuf */
9193
92- if (rank == root ) {
94+ if ( vrank % 2 ) { /* leaves */
95+ /* recv from parent on leaf nodes */
96+ err = MCA_PML_CALL (recv (rbuf , rcount , rdtype , bmtree -> tree_prev ,
97+ MCA_COLL_BASE_TAG_SCATTER , comm , & status ));
98+ if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
99+ return MPI_SUCCESS ;
100+
101+ }
102+ OBJ_CONSTRUCT ( & convertor , opal_convertor_t );
103+ if (rank == root ) { /* root and non-leafs */
93104 ompi_datatype_type_extent (sdtype , & sextent );
94- ssize = opal_datatype_span (& sdtype -> super , (int64_t )scount * size , & sgap );
95- if (0 == root ) {
96- /* root on 0, just use the send buffer */
97- ptmp = (char * )sbuf ;
98- if (rbuf != MPI_IN_PLACE ) {
99- /* local copy to rbuf */
100- err = ompi_datatype_sndrcv (sbuf , scount , sdtype ,
101- rbuf , rcount , rdtype );
102- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
103- }
104- } else {
105- /* root is not on 0, allocate temp buffer for send */
106- tempbuf = (char * )malloc (ssize );
105+ ptmp = (char * )sbuf ; /* if root == 0, just use the send buffer */
106+ if (0 != root ) {
107+ opal_convertor_copy_and_prepare_for_send ( ompi_mpi_local_convertor , & (sdtype -> super ),
108+ scount * size , sbuf , 0 , & convertor );
109+ opal_convertor_get_packed_size ( & convertor , & packed_sizet );
110+ packed_size = (int )packed_sizet ;
111+ packed_sizet = packed_sizet / size ;
112+ ptmp = tempbuf = (char * )malloc (packed_size );
107113 if (NULL == tempbuf ) {
108114 err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto err_hndl ;
109115 }
110- ptmp = tempbuf - sgap ;
111-
112- /* and rotate data so they will eventually in the right place */
113- err = ompi_datatype_copy_content_same_ddt (sdtype , (ptrdiff_t )scount * (ptrdiff_t )(size - root ),
114- ptmp , (char * ) sbuf + sextent * (ptrdiff_t )root * (ptrdiff_t )scount );
115- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
116-
117- err = ompi_datatype_copy_content_same_ddt (sdtype , (ptrdiff_t )scount * (ptrdiff_t )root ,
118- ptmp + sextent * (ptrdiff_t )scount * (ptrdiff_t )(size - root ), (char * )sbuf );
119- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
120-
121- if (rbuf != MPI_IN_PLACE ) {
122- /* local copy to rbuf */
123- err = ompi_datatype_sndrcv (ptmp , scount , sdtype ,
124- rbuf , rcount , rdtype );
125- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
126- }
116+ /* rotate data so they will eventually be in the right place */
117+ struct iovec iov [1 ];
118+ uint32_t iov_size = 1 ;
119+
120+ iov [0 ].iov_base = ptmp + (ptrdiff_t )(size - root ) * packed_sizet ;
121+ iov [0 ].iov_len = max_data = packed_sizet * (ptrdiff_t )root ;
122+ opal_convertor_pack (& convertor , iov , & iov_size , & max_data );
123+
124+ iov [0 ].iov_base = ptmp ;
125+ iov [0 ].iov_len = max_data = packed_sizet * (ptrdiff_t )(size - root );
126+ opal_convertor_pack (& convertor , iov , & iov_size , & max_data );
127+ OBJ_DESTRUCT (& convertor );
128+
129+ sdtype = MPI_PACKED ;
130+ sextent = 1 ; /* bytes */
131+ scount = packed_size / size ;
127132 }
128- } else if (!(vrank % 2 )) {
129- /* non-root, non-leaf nodes, allocate temp buffer for recv
130- * the most we need is rcount*size/2 */
131- ompi_datatype_type_extent (rdtype , & rextent );
132- rsize = opal_datatype_span (& rdtype -> super , (int64_t )rcount * size , & rgap );
133- tempbuf = (char * )malloc (rsize / 2 );
133+ curr_count = scount * size ;
134+ } else { /* (!(vrank % 2)) */
135+ opal_convertor_copy_and_prepare_for_send ( ompi_mpi_local_convertor , & (rdtype -> super ),
136+ rcount , NULL , 0 , & convertor );
137+ opal_convertor_get_packed_size ( & convertor , & packed_sizet );
138+ scount = (int )packed_sizet ;
139+
140+ sdtype = MPI_PACKED ; /* default to MPI_PACKED as the send type */
141+ packed_size = scount * (size + 1 )/2 ; /* non-root, non-leaf nodes, allocate temp buffer for recv
142+ * the most we need is rcount*size/2 */
143+ ptmp = tempbuf = (char * )malloc (packed_size );
134144 if (NULL == tempbuf ) {
135145 err = OMPI_ERR_OUT_OF_RESOURCE ; line = __LINE__ ; goto err_hndl ;
136146 }
137- ptmp = tempbuf - rgap ;
138- sdtype = rdtype ;
139- scount = rcount ;
140- sextent = rextent ;
141- }
142147
143- int curr_count = (rank == root ) ? scount * size : 0 ;
144- if (!(vrank % 2 )) {
145- if (rank != root ) {
146- /* recv from parent on non-root */
147- err = MCA_PML_CALL (recv (ptmp , (ptrdiff_t )rcount * (ptrdiff_t )size , rdtype , bmtree -> tree_prev ,
148- MCA_COLL_BASE_TAG_SCATTER , comm , & status ));
149- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
150-
151- /* Get received count */
152- size_t rdtype_size ;
153- ompi_datatype_type_size (rdtype , & rdtype_size );
154- curr_count = (int )(status ._ucount / rdtype_size );
155-
156- /* local copy to rbuf */
157- err = ompi_datatype_sndrcv (ptmp , scount , sdtype ,
158- rbuf , rcount , rdtype );
159- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
160- }
161- /* send to children on all non-leaf */
162- for (int i = bmtree -> tree_nextsize - 1 ; i >= 0 ; i -- ) {
163- /* figure out how much data I have to send to this child */
164- int vchild = (bmtree -> tree_next [i ] - root + size ) % size ;
165- int send_count = vchild - vrank ;
166- if (send_count > size - vchild )
167- send_count = size - vchild ;
168- send_count *= scount ;
169- err = MCA_PML_CALL (send (ptmp + (ptrdiff_t )(curr_count - send_count ) * sextent ,
170- send_count , sdtype , bmtree -> tree_next [i ],
171- MCA_COLL_BASE_TAG_SCATTER ,
172- MCA_PML_BASE_SEND_STANDARD , comm ));
173- if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
174- curr_count -= send_count ;
175- }
176- if (NULL != tempbuf )
177- free (tempbuf );
178- } else {
179- /* recv from parent on leaf nodes */
180- err = MCA_PML_CALL (recv (ptmp , rcount , rdtype , bmtree -> tree_prev ,
148+ /* recv from parent on non-root */
149+ err = MCA_PML_CALL (recv (ptmp , (ptrdiff_t )packed_size , MPI_PACKED , bmtree -> tree_prev ,
181150 MCA_COLL_BASE_TAG_SCATTER , comm , & status ));
182151 if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
152+
153+ /* Get received count */
154+ curr_count = (int )status ._ucount ; /* no need for conversion, work in bytes */
155+ sextent = 1 ; /* bytes */
183156 }
184157
158+ if (rbuf != MPI_IN_PLACE ) { /* local copy to rbuf */
159+ err = ompi_datatype_sndrcv (ptmp , scount , sdtype ,
160+ rbuf , rcount , rdtype );
161+ if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
162+ }
163+
164+ /* send to children on all non-leaf */
165+ for (int i = bmtree -> tree_nextsize - 1 ; i >= 0 ; i -- ) {
166+ /* figure out how much data I have to send to this child */
167+ int vchild = (bmtree -> tree_next [i ] - root + size ) % size ;
168+ int send_count = vchild - vrank ;
169+ if (send_count > size - vchild )
170+ send_count = size - vchild ;
171+ send_count *= scount ;
172+
173+ err = MCA_PML_CALL (send (ptmp + (ptrdiff_t )(curr_count - send_count ) * sextent ,
174+ send_count , sdtype , bmtree -> tree_next [i ],
175+ MCA_COLL_BASE_TAG_SCATTER ,
176+ MCA_PML_BASE_SEND_STANDARD , comm ));
177+ if (MPI_SUCCESS != err ) { line = __LINE__ ; goto err_hndl ; }
178+ curr_count -= send_count ;
179+ }
180+ if (NULL != tempbuf )
181+ free (tempbuf );
182+
185183 return MPI_SUCCESS ;
186184
187185 err_hndl :
0 commit comments