Skip to content

handle errors gracefuly to prevent SEGV #13238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 7, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions ompi/mca/coll/ucc/coll_ucc_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2021 Mellanox Technologies. All rights reserved.
* Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
* All Rights reserved.
* Copyright (c) 2022-2024 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2022-2025 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -150,7 +150,7 @@ static ucc_status_t oob_allgather_test(void *req)
size_t msglen = oob_req->msglen;
int probe_count = 5;
int rank, size, sendto, recvfrom, recvdatafrom,
senddatafrom, completed, probe;
senddatafrom, completed, probe, rc;

size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
Expand All @@ -175,10 +175,16 @@ static ucc_status_t oob_allgather_test(void *req)
senddatafrom = (rank - oob_req->iter + size) % size;
tmprecv = (char*)oob_req->rbuf + (ptrdiff_t)recvdatafrom * (ptrdiff_t)msglen;
tmpsend = (char*)oob_req->rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)msglen;
MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
rc = MCA_PML_CALL(isend(tmpsend, msglen, MPI_BYTE, sendto, MCA_COLL_BASE_TAG_UCC,
MCA_PML_BASE_SEND_STANDARD, comm, &oob_req->reqs[0]));
MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
if (OMPI_SUCCESS != rc) {
return UCC_ERR_NO_MESSAGE;
}
rc = MCA_PML_CALL(irecv(tmprecv, msglen, MPI_BYTE, recvfrom,
MCA_COLL_BASE_TAG_UCC, comm, &oob_req->reqs[1]));
if (OMPI_SUCCESS != rc) {
return UCC_ERR_NO_MESSAGE;
}
}
probe = 0;
do {
Expand Down Expand Up @@ -206,6 +212,8 @@ static ucc_status_t oob_allgather(void *sbuf, void *rbuf, size_t msglen,
oob_req->msglen = msglen;
oob_req->oob_coll_ctx = oob_coll_ctx;
oob_req->iter = 0;
oob_req->reqs[0] = MPI_REQUEST_NULL;
oob_req->reqs[1] = MPI_REQUEST_NULL;
*req = oob_req;
return UCC_OK;
}
Expand Down