Merge pull request #8138 from rajachan/peek-probe-errorflow-41x
[v4.1.x] mtl/ofi: Fix erroneous FI_PEEK/FI_CLAIM usage
Этот коммит содержится в:
Коммит
03e10c5dc9
@ -996,10 +996,20 @@ __opal_attribute_always_inline__ static inline int
|
||||
ompi_mtl_ofi_probe_error_callback(struct fi_cq_err_entry *error,
|
||||
ompi_mtl_ofi_request_t *ofi_req)
|
||||
{
|
||||
ofi_req->status.MPI_ERROR = MPI_ERR_INTERN;
|
||||
ofi_req->completion_count--;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
/*
|
||||
* Receives posted with FI_PEEK and friends will get an error
|
||||
* completion with FI_ENOMSG. This just indicates the lack of a match for
|
||||
* the probe and is not an error case. All other error cases are
|
||||
* provider-internal errors and should be flagged as such.
|
||||
*/
|
||||
if (error->err == FI_ENOMSG)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
ofi_req->status.MPI_ERROR = MPI_ERR_INTERN;
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
__opal_attribute_always_inline__ static inline int
|
||||
@ -1044,7 +1054,6 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
|
||||
/**
|
||||
* fi_trecvmsg with FI_PEEK:
|
||||
* Initiate a search for a match in the hardware or software queue.
|
||||
* The search can complete immediately with -ENOMSG.
|
||||
* If successful, libfabric will enqueue a context entry into the completion
|
||||
* queue to make the search nonblocking. This code will poll until the
|
||||
* entry is enqueued.
|
||||
@ -1065,13 +1074,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
|
||||
ofi_req.match_state = 0;
|
||||
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
|
||||
if (-FI_ENOMSG == ret) {
|
||||
/**
|
||||
* The search request completed but no matching message was found.
|
||||
*/
|
||||
*flag = 0;
|
||||
return OMPI_SUCCESS;
|
||||
} else if (OPAL_UNLIKELY(0 > ret)) {
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
|
||||
return ompi_mtl_ofi_get_error(ret);
|
||||
}
|
||||
@ -1141,7 +1144,6 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
|
||||
/**
|
||||
* fi_trecvmsg with FI_PEEK and FI_CLAIM:
|
||||
* Initiate a search for a match in the hardware or software queue.
|
||||
* The search can complete immediately with -ENOMSG.
|
||||
* If successful, libfabric will enqueue a context entry into the completion
|
||||
* queue to make the search nonblocking. This code will poll until the
|
||||
* entry is enqueued.
|
||||
@ -1163,14 +1165,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
|
||||
ofi_req->mask_bits = mask_bits;
|
||||
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
|
||||
if (-FI_ENOMSG == ret) {
|
||||
/**
|
||||
* The search request completed but no matching message was found.
|
||||
*/
|
||||
*matched = 0;
|
||||
free(ofi_req);
|
||||
return OMPI_SUCCESS;
|
||||
} else if (OPAL_UNLIKELY(0 > ret)) {
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
|
||||
free(ofi_req);
|
||||
return ompi_mtl_ofi_get_error(ret);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user