1
1

Merge pull request #8138 from rajachan/peek-probe-errorflow-41x

[v4.1.x] mtl/ofi: Fix erroneous FI_PEEK/FI_CLAIM usage
Этот коммит содержится в:
Brian Barrett 2020-10-26 16:38:59 -07:00 коммит произвёл GitHub
родитель 83f2ffae71 c55d3d3469
Коммит 03e10c5dc9
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23

Просмотреть файл

@ -996,10 +996,20 @@ __opal_attribute_always_inline__ static inline int
ompi_mtl_ofi_probe_error_callback(struct fi_cq_err_entry *error,
ompi_mtl_ofi_request_t *ofi_req)
{
ofi_req->status.MPI_ERROR = MPI_ERR_INTERN;
ofi_req->completion_count--;
return OMPI_SUCCESS;
/*
* Receives posted with FI_PEEK and friends will get an error
* completion with FI_ENOMSG. This just indicates the lack of a match for
* the probe and is not an error case. All other error cases are
* provider-internal errors and should be flagged as such.
*/
if (error->err == FI_ENOMSG)
return OMPI_SUCCESS;
ofi_req->status.MPI_ERROR = MPI_ERR_INTERN;
return OMPI_ERROR;
}
__opal_attribute_always_inline__ static inline int
@ -1044,7 +1054,6 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
/**
* fi_trecvmsg with FI_PEEK:
* Initiate a search for a match in the hardware or software queue.
* The search can complete immediately with -ENOMSG.
* If successful, libfabric will enqueue a context entry into the completion
* queue to make the search nonblocking. This code will poll until the
* entry is enqueued.
@ -1065,13 +1074,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
ofi_req.match_state = 0;
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
if (-FI_ENOMSG == ret) {
/**
* The search request completed but no matching message was found.
*/
*flag = 0;
return OMPI_SUCCESS;
} else if (OPAL_UNLIKELY(0 > ret)) {
if (OPAL_UNLIKELY(0 > ret)) {
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
return ompi_mtl_ofi_get_error(ret);
}
@ -1141,7 +1144,6 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
/**
* fi_trecvmsg with FI_PEEK and FI_CLAIM:
* Initiate a search for a match in the hardware or software queue.
* The search can complete immediately with -ENOMSG.
* If successful, libfabric will enqueue a context entry into the completion
* queue to make the search nonblocking. This code will poll until the
* entry is enqueued.
@ -1163,14 +1165,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
ofi_req->mask_bits = mask_bits;
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
if (-FI_ENOMSG == ret) {
/**
* The search request completed but no matching message was found.
*/
*matched = 0;
free(ofi_req);
return OMPI_SUCCESS;
} else if (OPAL_UNLIKELY(0 > ret)) {
if (OPAL_UNLIKELY(0 > ret)) {
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
free(ofi_req);
return ompi_mtl_ofi_get_error(ret);