@@ -1136,24 +1136,31 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count,
11361136 ret = ompi_osc_ucx_put (origin_addr , origin_count , origin_dt , target , target_disp ,
11371137 target_count , target_dt , win );
11381138 if (ret != OMPI_SUCCESS ) {
1139+ OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
11391140 return ret ;
11401141 }
11411142
1142- ret = opal_common_ucx_wpmem_fence (mem );
1143- if (ret != OMPI_SUCCESS ) {
1144- OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1145- return OMPI_ERROR ;
1146- }
1147-
11481143 mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1149- /* TODO: investigate whether ucp_worker_flush_nb is a better choice here */
1150- ret = opal_common_ucx_wpmem_fetch_nb (module -> state_mem , UCP_ATOMIC_FETCH_OP_FADD ,
1151- 0 , target , & (module -> req_result ),
1152- sizeof (uint64_t ), remote_addr & (~0x7 ),
1153- req_completion , ucx_req );
1144+ ret = opal_common_ucx_wpmem_flush_ep_nb (mem , target , req_completion , ucx_req );
1145+
11541146 if (ret != OMPI_SUCCESS ) {
1155- OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1156- return ret ;
1147+
1148+ /* fallback to using an atomic op to acquire a request handle */
1149+ ret = opal_common_ucx_wpmem_fence (mem );
1150+ if (ret != OMPI_SUCCESS ) {
1151+ OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1152+ OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1153+ return OMPI_ERROR ;
1154+ }
1155+
1156+ ret = opal_common_ucx_wpmem_fetch_nb (mem , UCP_ATOMIC_FETCH_OP_FADD ,
1157+ 0 , target , & (module -> req_result ),
1158+ sizeof (uint64_t ), remote_addr & (~0x7 ),
1159+ req_completion , ucx_req );
1160+ if (ret != OMPI_SUCCESS ) {
1161+ OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1162+ return ret ;
1163+ }
11571164 }
11581165
11591166 * request = & ucx_req -> super ;
@@ -1191,24 +1198,31 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count,
11911198 ret = ompi_osc_ucx_get (origin_addr , origin_count , origin_dt , target , target_disp ,
11921199 target_count , target_dt , win );
11931200 if (ret != OMPI_SUCCESS ) {
1201+ OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
11941202 return ret ;
11951203 }
11961204
1197- ret = opal_common_ucx_wpmem_fence (mem );
1198- if (ret != OMPI_SUCCESS ) {
1199- OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1200- return OMPI_ERROR ;
1201- }
1202-
12031205 mca_osc_ucx_component .num_incomplete_req_ops ++ ;
1204- /* TODO: investigate whether ucp_worker_flush_nb is a better choice here */
1205- ret = opal_common_ucx_wpmem_fetch_nb (module -> state_mem , UCP_ATOMIC_FETCH_OP_FADD ,
1206- 0 , target , & (module -> req_result ),
1207- sizeof (uint64_t ), remote_addr & (~0x7 ),
1208- req_completion , ucx_req );
1206+ ret = opal_common_ucx_wpmem_flush_ep_nb (mem , target , req_completion , ucx_req );
1207+
12091208 if (ret != OMPI_SUCCESS ) {
1210- OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1211- return ret ;
1209+
1210+ /* fallback to using an atomic op to acquire a request handle */
1211+ ret = opal_common_ucx_wpmem_fence (mem );
1212+ if (ret != OMPI_SUCCESS ) {
1213+ OSC_UCX_VERBOSE (1 , "opal_common_ucx_mem_fence failed: %d" , ret );
1214+ OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1215+ return OMPI_ERROR ;
1216+ }
1217+
1218+ ret = opal_common_ucx_wpmem_fetch_nb (mem , UCP_ATOMIC_FETCH_OP_FADD ,
1219+ 0 , target , & (module -> req_result ),
1220+ sizeof (uint64_t ), remote_addr & (~0x7 ),
1221+ req_completion , ucx_req );
1222+ if (ret != OMPI_SUCCESS ) {
1223+ OMPI_OSC_UCX_REQUEST_RETURN (ucx_req );
1224+ return ret ;
1225+ }
12121226 }
12131227
12141228 * request = & ucx_req -> super ;
0 commit comments