22/*
33 * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
44 * reserved.
5- * Copyright (c) 2016-2017 Research Organization for Information Science
6- * and Technology (RIST). All rights reserved.
5+ * Copyright (c) 2016-2019 Research Organization for Information Science
6+ * and Technology (RIST). All rights reserved.
77 * Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
8+ * Copyright (c) 2019 Triad National Security, LLC. All rights
9+ * reserved.
810 * $COPYRIGHT$
911 *
1012 * Additional copyrights may follow
@@ -50,71 +52,6 @@ struct ompi_osc_rdma_event_t {
5052
5153typedef struct ompi_osc_rdma_event_t ompi_osc_rdma_event_t ;
5254
53- #if 0
54- static void * ompi_osc_rdma_event_put (int fd , int flags , void * context )
55- {
56- ompi_osc_rdma_event_t * event = (ompi_osc_rdma_event_t * ) context ;
57- int ret ;
58-
59- ret = event -> module -> selected_btl -> btl_put (event -> module -> selected_btl , event -> endpoint , event -> local_address ,
60- event -> remote_address , event -> local_handle , event -> remote_handle ,
61- event -> length , 0 , MCA_BTL_NO_ORDER , event -> cbfunc , event -> cbcontext ,
62- event -> cbdata );
63- if (OPAL_LIKELY (OPAL_SUCCESS == ret )) {
64- /* done with this event */
65- opal_event_del (& event -> super );
66- free (event );
67- } else {
68- /* re-activate the event */
69- opal_event_active (& event -> super , OPAL_EV_READ , 1 );
70- }
71-
72- return NULL ;
73- }
74-
75- static int ompi_osc_rdma_event_queue (ompi_osc_rdma_module_t * module , struct mca_btl_base_endpoint_t * endpoint ,
76- ompi_osc_rdma_event_type_t event_type , void * local_address , mca_btl_base_registration_handle_t * local_handle ,
77- uint64_t remote_address , mca_btl_base_registration_handle_t * remote_handle ,
78- uint64_t length , mca_btl_base_rdma_completion_fn_t cbfunc , void * cbcontext ,
79- void * cbdata )
80- {
81- ompi_osc_rdma_event_t * event = malloc (sizeof (* event ));
82- void * (* event_func ) (int , int , void * );
83-
84- OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "queueing event type %d" , event_type );
85-
86- if (OPAL_UNLIKELY (NULL == event )) {
87- return OMPI_ERR_OUT_OF_RESOURCE ;
88- }
89-
90- event -> module = module ;
91- event -> endpoint = endpoint ;
92- event -> local_address = local_address ;
93- event -> local_handle = local_handle ;
94- event -> remote_address = remote_address ;
95- event -> remote_handle = remote_handle ;
96- event -> length = length ;
97- event -> cbfunc = cbfunc ;
98- event -> cbcontext = cbcontext ;
99- event -> cbdata = cbdata ;
100-
101- switch (event_type ) {
102- case OMPI_OSC_RDMA_EVENT_TYPE_PUT :
103- event_func = ompi_osc_rdma_event_put ;
104- break ;
105- default :
106- opal_output (0 , "osc/rdma: cannot queue unknown event type %d" , event_type );
107- abort ();
108- }
109-
110- opal_event_set (opal_sync_event_base , & event -> super , -1 , OPAL_EV_READ ,
111- event_func , event );
112- opal_event_active (& event -> super , OPAL_EV_READ , 1 );
113-
114- return OMPI_SUCCESS ;
115- }
116- #endif
117-
11855static int ompi_osc_rdma_gacc_local (const void * source_buffer , int source_count , ompi_datatype_t * source_datatype ,
11956 void * result_buffer , int result_count , ompi_datatype_t * result_datatype ,
12057 ompi_osc_rdma_peer_t * peer , uint64_t target_address ,
@@ -127,7 +64,7 @@ static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count
12764 do {
12865 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "performing accumulate with local region(s)" );
12966
130- if (NULL != result_buffer ) {
67+ if (NULL != result_datatype ) {
13168 /* get accumulate */
13269
13370 ret = ompi_datatype_sndrcv ((void * ) (intptr_t ) target_address , target_count , target_datatype ,
@@ -184,7 +121,8 @@ static inline int ompi_osc_rdma_cas_local (const void *source_addr, const void *
184121
185122static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t * sync , const void * source , int source_count ,
186123 ompi_datatype_t * source_datatype , void * result , int result_count ,
187- ompi_datatype_t * result_datatype , ompi_osc_rdma_peer_t * peer , uint64_t target_address ,
124+ ompi_datatype_t * result_datatype , opal_convertor_t * result_convertor ,
125+ ompi_osc_rdma_peer_t * peer , uint64_t target_address ,
188126 mca_btl_base_registration_handle_t * target_handle , int target_count ,
189127 ompi_datatype_t * target_datatype , ompi_op_t * op , ompi_osc_rdma_request_t * request )
190128{
@@ -219,8 +157,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
219157 uint32_t iov_count = 1 ;
220158 size_t size = request -> len ;
221159
222- opal_convertor_unpack (& request -> convertor , & iov , & iov_count , & size );
223- opal_convertor_cleanup (& request -> convertor );
160+ opal_convertor_unpack (result_convertor , & iov , & iov_count , & size );
224161 } else {
225162 /* copy contiguous data to the result buffer */
226163 ompi_datatype_sndrcv (ptr , len , MPI_BYTE , result , result_count , result_datatype );
@@ -264,7 +201,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
264201 struct iovec source_iovec [OMPI_OSC_RDMA_DECODE_MAX ], target_iovec [OMPI_OSC_RDMA_DECODE_MAX ];
265202 const size_t acc_limit = (mca_osc_rdma_component .buffer_size >> 3 );
266203 uint32_t source_primitive_count , target_primitive_count ;
267- opal_convertor_t source_convertor , target_convertor ;
204+ opal_convertor_t source_convertor , target_convertor , result_convertor ;
268205 uint32_t source_iov_count , target_iov_count ;
269206 uint32_t source_iov_index , target_iov_index ;
270207 ompi_datatype_t * source_primitive , * target_primitive ;
@@ -281,6 +218,13 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
281218 request -> internal = true;
282219 }
283220
221+ if (& ompi_mpi_op_no_op .op == op ) {
222+ /* NTH: just zero these out to catch any coding errors (they should be ignored in the no-op case) */
223+ source_count = 0 ;
224+ source_datatype = NULL ;
225+ source_addr = NULL ;
226+ }
227+
284228 request -> cleanup = ompi_osc_rdma_gacc_master_cleanup ;
285229 request -> type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC ;
286230
@@ -303,7 +247,7 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
303247 }
304248
305249 ret = ompi_osc_rdma_gacc_contig (sync , source_addr , source_count , source_datatype , result_addr ,
306- result_count , result_datatype , peer , target_address ,
250+ result_count , result_datatype , NULL , peer , target_address ,
307251 target_handle , target_count , target_datatype , op ,
308252 request );
309253 if (OPAL_LIKELY (OMPI_SUCCESS == ret )) {
@@ -357,6 +301,20 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
357301 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
358302 return ret ;
359303 }
304+ source_iov_count = 0 ;
305+ } else {
306+ source_iovec [0 ].iov_len = (size_t ) -1 ;
307+ source_iovec [0 ].iov_base = NULL ;
308+ source_iov_count = 1 ;
309+ }
310+
311+ if (result_datatype ) {
312+ OBJ_CONSTRUCT (& result_convertor , opal_convertor_t );
313+ ret = opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor , & result_datatype -> super , result_count , result_addr ,
314+ 0 , & result_convertor );
315+ if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
316+ return ret ;
317+ }
360318 }
361319
362320 /* target_datatype can never be NULL */
@@ -372,59 +330,42 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
372330
373331 target_iov_index = 0 ;
374332 target_iov_count = 0 ;
333+ source_iov_index = 0 ;
375334 result_position = 0 ;
376335 subreq = NULL ;
377336
378337 do {
379- /* decode segments of the source data */
380- source_iov_count = OMPI_OSC_RDMA_DECODE_MAX ;
381- source_iov_index = 0 ;
382- /* opal_convertor_raw returns done when it has reached the end of the data */
383- if (!source_datatype ) {
384- done = true;
385- source_iovec [0 ].iov_len = (size_t ) -1 ;
386- source_iovec [0 ].iov_base = NULL ;
387- source_iov_count = 1 ;
388- } else {
389- done = opal_convertor_raw (& source_convertor , source_iovec , & source_iov_count , & source_size );
390- }
391-
392- /* loop on the target segments until we have exhaused the decoded source data */
393- while (source_iov_index != source_iov_count ) {
394- if (target_iov_index == target_iov_count ) {
395- /* decode segments of the target buffer */
396- target_iov_count = OMPI_OSC_RDMA_DECODE_MAX ;
397- target_iov_index = 0 ;
398- (void ) opal_convertor_raw (& target_convertor , target_iovec , & target_iov_count , & target_size );
338+ /* decode segments of the target buffer */
339+ target_iov_count = OMPI_OSC_RDMA_DECODE_MAX ;
340+ target_iov_index = 0 ;
341+ done = opal_convertor_raw (& target_convertor , target_iovec , & target_iov_count , & target_size );
342+
343+ /* loop on the source segments (if any) until we have exhaused the decoded target data */
344+ while (target_iov_index != target_iov_count ) {
345+ if (source_iov_count == source_iov_index ) {
346+ /* decode segments of the source data */
347+ source_iov_count = OMPI_OSC_RDMA_DECODE_MAX ;
348+ source_iov_index = 0 ;
349+ (void ) opal_convertor_raw (& source_convertor , source_iovec , & source_iov_count , & source_size );
399350 }
400351
401352 /* we already checked that the target was large enough. this should be impossible */
402353 assert (0 != target_iov_count );
403354
404355 /* determine how much to put in this operation */
405- acc_len = min (target_iovec [target_iov_index ].iov_len , source_iovec [source_iov_index ].iov_len );
406- acc_len = min ((size_t ) acc_len , acc_limit );
356+ acc_len = min (min (target_iovec [target_iov_index ].iov_len , source_iovec [source_iov_index ].iov_len ), acc_limit );
407357
408- /* execute the get */
358+ /* execute the get-accumulate */
409359 if (!subreq ) {
410360 OMPI_OSC_RDMA_REQUEST_ALLOC (module , peer , subreq );
411361 subreq -> internal = true;
412362 subreq -> parent_request = request ;
363+ subreq -> type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC ;
413364 (void ) OPAL_THREAD_ADD_FETCH32 (& request -> outstanding_requests , 1 );
414365 }
415366
416- if (result_datatype ) {
417- /* prepare a convertor for this part of the result */
418- opal_convertor_copy_and_prepare_for_recv (ompi_mpi_local_convertor , & result_datatype -> super , result_count ,
419- result_addr , 0 , & subreq -> convertor );
420- opal_convertor_set_position (& subreq -> convertor , & result_position );
421- subreq -> type = OMPI_OSC_RDMA_TYPE_GET_ACC ;
422- } else {
423- subreq -> type = OMPI_OSC_RDMA_TYPE_ACC ;
424- }
425-
426367 ret = ompi_osc_rdma_gacc_contig (sync , source_iovec [source_iov_index ].iov_base , acc_len / target_primitive -> super .size ,
427- target_primitive , NULL , 0 , NULL , peer ,
368+ target_primitive , NULL , 0 , NULL , & result_convertor , peer ,
428369 (uint64_t ) (intptr_t ) target_iovec [target_iov_index ].iov_base , target_handle ,
429370 acc_len / target_primitive -> super .size , target_primitive , op , subreq );
430371 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
@@ -444,13 +385,16 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
444385
445386 /* adjust io vectors */
446387 target_iovec [target_iov_index ].iov_len -= acc_len ;
447- source_iovec [source_iov_index ].iov_len -= acc_len ;
448388 target_iovec [target_iov_index ].iov_base = (void * )((intptr_t ) target_iovec [target_iov_index ].iov_base + acc_len );
449- source_iovec [source_iov_index ].iov_base = (void * )((intptr_t ) source_iovec [source_iov_index ].iov_base + acc_len );
389+ target_iov_index += (0 == target_iovec [target_iov_index ].iov_len );
390+
450391 result_position += acc_len ;
451392
452- source_iov_index += !source_datatype || (0 == source_iovec [source_iov_index ].iov_len );
453- target_iov_index += (0 == target_iovec [target_iov_index ].iov_len );
393+ if (source_datatype ) {
394+ source_iov_index += (0 == source_iovec [source_iov_index ].iov_len );
395+ source_iovec [source_iov_index ].iov_len -= acc_len ;
396+ source_iovec [source_iov_index ].iov_base = (void * )((intptr_t ) source_iovec [source_iov_index ].iov_base + acc_len );
397+ }
454398 }
455399 } while (!done );
456400
@@ -462,6 +406,11 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
462406 OBJ_DESTRUCT (& source_convertor );
463407 }
464408
409+ if (result_datatype ) {
410+ opal_convertor_cleanup (& result_convertor );
411+ OBJ_DESTRUCT (& result_convertor );
412+ }
413+
465414 OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "finished scheduling rdma on non-contiguous datatype(s)" );
466415
467416 opal_convertor_cleanup (& target_convertor );
@@ -587,9 +536,9 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi
587536 new_value = old_value ;
588537
589538 if (& ompi_mpi_op_replace .op == op ) {
590- memcpy ((void * )((intptr_t ) & new_value + offset ), origin_addr , extent );
539+ memcpy ((void * )((intptr_t ) & new_value + offset ), origin_addr + dt -> super . true_lb , extent );
591540 } else if (& ompi_mpi_op_no_op .op != op ) {
592- ompi_op_reduce (op , (void * ) origin_addr , (void * )((intptr_t ) & new_value + offset ), 1 , dt );
541+ ompi_op_reduce (op , (void * ) origin_addr + dt -> super . true_lb , (void * )((intptr_t ) & new_value + offset ), 1 , dt );
593542 }
594543
595544 ret = ompi_osc_rdma_btl_cswap (module , peer -> data_endpoint , address , target_handle ,
@@ -854,7 +803,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
854803 ompi_osc_rdma_module_t * module = sync -> module ;
855804 mca_btl_base_registration_handle_t * target_handle ;
856805 uint64_t target_address ;
857- ptrdiff_t lb , origin_extent , target_span ;
806+ ptrdiff_t lb , target_lb , origin_extent , target_span ;
858807 bool lock_acquired = false;
859808 int ret ;
860809
@@ -867,11 +816,11 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
867816 return OMPI_SUCCESS ;
868817 }
869818
870- target_span = opal_datatype_span (& target_datatype -> super , target_count , & lb );
819+ target_span = opal_datatype_span (& target_datatype -> super , target_count , & target_lb );
871820
872821 // a buffer defined by (buf, count, dt)
873822 // will have data starting at buf+offset and ending len bytes later:
874- ret = osc_rdma_get_remote_segment (module , peer , target_disp , target_span + lb , & target_address , & target_handle );
823+ ret = osc_rdma_get_remote_segment (module , peer , target_disp , target_span + target_lb , & target_address , & target_handle );
875824 if (OPAL_UNLIKELY (OMPI_SUCCESS != ret )) {
876825 return ret ;
877826 }
@@ -895,10 +844,10 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
895844 if (origin_extent <= 8 && 1 == origin_count && !ompi_osc_rdma_peer_local_base (peer )) {
896845 if (module -> acc_use_amo && ompi_datatype_is_predefined (origin_datatype )) {
897846 if (NULL == result_addr ) {
898- ret = ompi_osc_rdma_acc_single_atomic (sync , origin_addr , origin_datatype , origin_extent , peer , target_address ,
847+ ret = ompi_osc_rdma_acc_single_atomic (sync , origin_addr , origin_datatype , origin_extent , peer , target_address + target_lb ,
899848 target_handle , op , request , lock_acquired );
900849 } else {
901- ret = ompi_osc_rdma_fetch_and_op_atomic (sync , origin_addr , result_addr , origin_datatype , origin_extent , peer , target_address ,
850+ ret = ompi_osc_rdma_fetch_and_op_atomic (sync , origin_addr , result_addr , origin_datatype , origin_extent , peer , target_address + target_lb ,
902851 target_handle , op , request , lock_acquired );
903852 }
904853
@@ -907,7 +856,7 @@ int ompi_osc_rdma_rget_accumulate_internal (ompi_osc_rdma_sync_t *sync, const vo
907856 }
908857 }
909858
910- ret = ompi_osc_rdma_fetch_and_op_cas (sync , origin_addr , result_addr , origin_datatype , origin_extent , peer , target_address ,
859+ ret = ompi_osc_rdma_fetch_and_op_cas (sync , origin_addr , result_addr , origin_datatype , origin_extent , peer , target_address + target_lb ,
911860 target_handle , op , request , lock_acquired );
912861 if (OMPI_SUCCESS == ret ) {
913862 return OMPI_SUCCESS ;
0 commit comments