PML/UCX: improved error processing in MPI_Recv
- improved error processing in MPI_Recv implementation of pml UCX - added error handling for pml_ucx_mrecv call Signed-off-by: Sergey Oblomov <sergeyo@nvidia.com>
Этот коммит содержится в:
родитель
487bbf31ba
Коммит
eb9405d53f
@ -611,6 +611,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src
|
||||
ucp_tag_t ucp_tag, ucp_tag_mask;
|
||||
ucp_tag_recv_info_t info;
|
||||
ucs_status_t status;
|
||||
int result;
|
||||
|
||||
PML_UCX_TRACE_RECV("%s", buf, count, datatype, src, tag, comm, "recv");
|
||||
|
||||
@ -627,7 +628,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src
|
||||
MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) {
|
||||
status = ucp_request_test(req, &info);
|
||||
if (status != UCS_INPROGRESS) {
|
||||
mca_pml_ucx_set_recv_status_safe(mpi_status, status, &info);
|
||||
result = mca_pml_ucx_set_recv_status_safe(mpi_status, status, &info);
|
||||
|
||||
#if SPC_ENABLE == 1
|
||||
size_t dt_size;
|
||||
@ -635,7 +636,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src
|
||||
SPC_USER_OR_MPI(tag, dt_size*count,
|
||||
OMPI_SPC_BYTES_RECEIVED_USER, OMPI_SPC_BYTES_RECEIVED_MPI);
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1093,8 +1094,7 @@ int mca_pml_ucx_mrecv(void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
|
||||
PML_UCX_MESSAGE_RELEASE(message);
|
||||
|
||||
ompi_request_wait(&req, status);
|
||||
return OMPI_SUCCESS;
|
||||
return ompi_request_wait(&req, status);
|
||||
}
|
||||
|
||||
int mca_pml_ucx_start(size_t count, ompi_request_t** requests)
|
||||
|
@ -165,7 +165,7 @@ static inline void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status,
|
||||
static inline int mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status,
|
||||
ucs_status_t ucp_status,
|
||||
const ucp_tag_recv_info_t *info)
|
||||
{
|
||||
@ -186,15 +186,23 @@ static inline void mca_pml_ucx_set_recv_status(ompi_status_public_t* mpi_status,
|
||||
} else {
|
||||
mpi_status->MPI_ERROR = MPI_ERR_INTERN;
|
||||
}
|
||||
|
||||
return mpi_status->MPI_ERROR;
|
||||
}
|
||||
|
||||
static inline void mca_pml_ucx_set_recv_status_safe(ompi_status_public_t* mpi_status,
|
||||
ucs_status_t ucp_status,
|
||||
const ucp_tag_recv_info_t *info)
|
||||
static inline int mca_pml_ucx_set_recv_status_safe(ompi_status_public_t* mpi_status,
|
||||
ucs_status_t ucp_status,
|
||||
const ucp_tag_recv_info_t *info)
|
||||
{
|
||||
if (mpi_status != MPI_STATUS_IGNORE) {
|
||||
mca_pml_ucx_set_recv_status(mpi_status, ucp_status, info);
|
||||
return mca_pml_ucx_set_recv_status(mpi_status, ucp_status, info);
|
||||
} else if (OPAL_LIKELY(ucp_status == UCS_OK) || (ucp_status == UCS_ERR_CANCELED)) {
|
||||
return UCS_OK;
|
||||
} else if (ucp_status == UCS_ERR_MESSAGE_TRUNCATED) {
|
||||
return MPI_ERR_TRUNCATE;
|
||||
}
|
||||
|
||||
return MPI_ERR_INTERN;
|
||||
}
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_ucx_persistent_request_t);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user