Merge pull request #8138 from rajachan/peek-probe-errorflow-41x
[v4.1.x] mtl/ofi: Fix erroneous FI_PEEK/FI_CLAIM usage
Этот коммит содержится в:
Коммит
03e10c5dc9
@ -996,10 +996,20 @@ __opal_attribute_always_inline__ static inline int
|
|||||||
ompi_mtl_ofi_probe_error_callback(struct fi_cq_err_entry *error,
|
ompi_mtl_ofi_probe_error_callback(struct fi_cq_err_entry *error,
|
||||||
ompi_mtl_ofi_request_t *ofi_req)
|
ompi_mtl_ofi_request_t *ofi_req)
|
||||||
{
|
{
|
||||||
ofi_req->status.MPI_ERROR = MPI_ERR_INTERN;
|
|
||||||
ofi_req->completion_count--;
|
ofi_req->completion_count--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Receives posted with FI_PEEK and friends will get an error
|
||||||
|
* completion with FI_ENOMSG. This just indicates the lack of a match for
|
||||||
|
* the probe and is not an error case. All other error cases are
|
||||||
|
* provider-internal errors and should be flagged as such.
|
||||||
|
*/
|
||||||
|
if (error->err == FI_ENOMSG)
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
ofi_req->status.MPI_ERROR = MPI_ERR_INTERN;
|
||||||
|
|
||||||
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
__opal_attribute_always_inline__ static inline int
|
__opal_attribute_always_inline__ static inline int
|
||||||
@ -1044,7 +1054,6 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
/**
|
/**
|
||||||
* fi_trecvmsg with FI_PEEK:
|
* fi_trecvmsg with FI_PEEK:
|
||||||
* Initiate a search for a match in the hardware or software queue.
|
* Initiate a search for a match in the hardware or software queue.
|
||||||
* The search can complete immediately with -ENOMSG.
|
|
||||||
* If successful, libfabric will enqueue a context entry into the completion
|
* If successful, libfabric will enqueue a context entry into the completion
|
||||||
* queue to make the search nonblocking. This code will poll until the
|
* queue to make the search nonblocking. This code will poll until the
|
||||||
* entry is enqueued.
|
* entry is enqueued.
|
||||||
@ -1065,13 +1074,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
ofi_req.match_state = 0;
|
ofi_req.match_state = 0;
|
||||||
|
|
||||||
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
|
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
|
||||||
if (-FI_ENOMSG == ret) {
|
if (OPAL_UNLIKELY(0 > ret)) {
|
||||||
/**
|
|
||||||
* The search request completed but no matching message was found.
|
|
||||||
*/
|
|
||||||
*flag = 0;
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
} else if (OPAL_UNLIKELY(0 > ret)) {
|
|
||||||
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
|
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
|
||||||
return ompi_mtl_ofi_get_error(ret);
|
return ompi_mtl_ofi_get_error(ret);
|
||||||
}
|
}
|
||||||
@ -1141,7 +1144,6 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
/**
|
/**
|
||||||
* fi_trecvmsg with FI_PEEK and FI_CLAIM:
|
* fi_trecvmsg with FI_PEEK and FI_CLAIM:
|
||||||
* Initiate a search for a match in the hardware or software queue.
|
* Initiate a search for a match in the hardware or software queue.
|
||||||
* The search can complete immediately with -ENOMSG.
|
|
||||||
* If successful, libfabric will enqueue a context entry into the completion
|
* If successful, libfabric will enqueue a context entry into the completion
|
||||||
* queue to make the search nonblocking. This code will poll until the
|
* queue to make the search nonblocking. This code will poll until the
|
||||||
* entry is enqueued.
|
* entry is enqueued.
|
||||||
@ -1163,14 +1165,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
ofi_req->mask_bits = mask_bits;
|
ofi_req->mask_bits = mask_bits;
|
||||||
|
|
||||||
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
|
MTL_OFI_RETRY_UNTIL_DONE(fi_trecvmsg(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep, &msg, msgflags), ret);
|
||||||
if (-FI_ENOMSG == ret) {
|
if (OPAL_UNLIKELY(0 > ret)) {
|
||||||
/**
|
|
||||||
* The search request completed but no matching message was found.
|
|
||||||
*/
|
|
||||||
*matched = 0;
|
|
||||||
free(ofi_req);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
} else if (OPAL_UNLIKELY(0 > ret)) {
|
|
||||||
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
|
MTL_OFI_LOG_FI_ERR(ret, "fi_trecvmsg failed");
|
||||||
free(ofi_req);
|
free(ofi_req);
|
||||||
return ompi_mtl_ofi_get_error(ret);
|
return ompi_mtl_ofi_get_error(ret);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user