From 7c621acf1b2e218af36f8c68f5db4b3d05115a31 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Fri, 31 Jan 2020 14:02:21 +0200 Subject: [PATCH] OPAL/UCX: enabling new API provided by UCX - added detection of new API into configuration - added tag_send call implemented using new API - added MPI_Send/MPI_Isend/MPI_Recv/MPI_Irecv implementations Signed-off-by: Sergey Oblomov (cherry picked from commit 75bda25ddbeea18bb001f367a712dc72592e1e58) --- config/ompi_check_ucx.m4 | 8 ++ ompi/mca/pml/ucx/pml_ucx.c | 115 +++++++++++++++++++++++----- ompi/mca/pml/ucx/pml_ucx_datatype.c | 92 ++++++++++++++++++---- ompi/mca/pml/ucx/pml_ucx_datatype.h | 55 +++++++++++-- ompi/mca/pml/ucx/pml_ucx_request.c | 46 ++++++++++- ompi/mca/pml/ucx/pml_ucx_request.h | 15 +++- 6 files changed, 286 insertions(+), 45 deletions(-) diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index 7f04ba3a52..667cc985a4 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -128,6 +128,14 @@ AC_DEFUN([OMPI_CHECK_UCX],[ [AC_DEFINE([HAVE_UCP_WORKER_ADDRESS_FLAGS], [1], [have worker address attribute])], [], [#include ]) + AC_CHECK_DECLS([ucp_tag_send_nbx, + ucp_tag_send_sync_nbx, + ucp_tag_recv_nbx], + [], [], + [#include ]) + AC_CHECK_TYPES([ucp_request_param_t], + [], [], + [[#include ]]) CPPFLAGS=$old_CPPFLAGS OPAL_SUMMARY_ADD([[Transports]],[[Open UCX]],[$1],[$ompi_check_ucx_happy])])]) diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 056d26538e..f7144ff50d 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -360,10 +360,10 @@ int mca_pml_ucx_cleanup(void) static ucp_ep_h mca_pml_ucx_add_proc_common(ompi_proc_t *proc) { + size_t addrlen = 0; ucp_ep_params_t ep_params; ucp_address_t *address; ucs_status_t status; - size_t addrlen; ucp_ep_h ep; int ret; @@ -431,6 +431,7 @@ int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs) return OMPI_SUCCESS; } +__opal_attribute_always_inline__ static inline ucp_ep_h mca_pml_ucx_get_ep(ompi_communicator_t *comm, int rank) { ucp_ep_h ep; @@ -556,6 +557,11 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t* comm, struct ompi_request_t **request) { +#if HAVE_DECL_UCP_TAG_RECV_NBX + pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype); + ucp_request_param_t *param = &op_data->op_param.recv; +#endif + ucp_tag_t ucp_tag, ucp_tag_mask; ompi_request_t *req; @@ -563,10 +569,16 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype, (void*)request); PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); +#if HAVE_DECL_UCP_TAG_RECV_NBX + req = (ompi_request_t*)ucp_tag_recv_nbx(ompi_pml_ucx.ucp_worker, buf, + mca_pml_ucx_get_data_size(op_data, count), + ucp_tag, ucp_tag_mask, param); +#else req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, buf, count, mca_pml_ucx_get_datatype(datatype), ucp_tag, ucp_tag_mask, mca_pml_ucx_recv_completion); +#endif if (UCS_PTR_IS_ERR(req)) { PML_UCX_ERROR("ucx recv failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); return OMPI_ERROR; @@ -582,20 +594,34 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src int tag, struct ompi_communicator_t* comm, ompi_status_public_t* mpi_status) { + /* coverity[bad_alloc_arithmetic] */ + void *req = PML_UCX_REQ_ALLOCA(); +#if HAVE_DECL_UCP_TAG_RECV_NBX + pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype); + ucp_request_param_t *recv_param = &op_data->op_param.recv; + ucp_request_param_t param; + + param.op_attr_mask = UCP_OP_ATTR_FIELD_REQUEST | + (recv_param->op_attr_mask & UCP_OP_ATTR_FIELD_DATATYPE); + param.datatype = recv_param->datatype; + param.request = req; +#endif ucp_tag_t ucp_tag, ucp_tag_mask; ucp_tag_recv_info_t info; ucs_status_t status; - void *req; PML_UCX_TRACE_RECV("%s", buf, count, datatype, src, tag, comm, "recv"); - /* coverity[bad_alloc_arithmetic] */ PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); - req = PML_UCX_REQ_ALLOCA(); - status = ucp_tag_recv_nbr(ompi_pml_ucx.ucp_worker, buf, count, - mca_pml_ucx_get_datatype(datatype), - ucp_tag, ucp_tag_mask, req); - +#if HAVE_DECL_UCP_TAG_RECV_NBX + ucp_tag_recv_nbx(ompi_pml_ucx.ucp_worker, buf, + mca_pml_ucx_get_data_size(op_data, count), + ucp_tag, ucp_tag_mask, ¶m); +#else + ucp_tag_recv_nbr(ompi_pml_ucx.ucp_worker, buf, count, + mca_pml_ucx_get_datatype(datatype), + ucp_tag, ucp_tag_mask, req); +#endif MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) { status = ucp_request_test(req, &info); if (status != UCS_INPROGRESS) { @@ -605,6 +631,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src } } +__opal_attribute_always_inline__ static inline const char *mca_pml_ucx_send_mode_name(mca_pml_base_send_mode_t mode) { switch (mode) { @@ -726,6 +753,7 @@ mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count, return NULL; } +__opal_attribute_always_inline__ static inline ucs_status_ptr_t mca_pml_ucx_common_send(ucp_ep_h ep, const void *buf, size_t count, ompi_datatype_t *datatype, @@ -743,6 +771,32 @@ static inline ucs_status_ptr_t mca_pml_ucx_common_send(ucp_ep_h ep, const void * } } +#if HAVE_DECL_UCP_TAG_SEND_NBX +__opal_attribute_always_inline__ +static inline ucs_status_ptr_t +mca_pml_ucx_common_send_nbx(ucp_ep_h ep, const void *buf, + size_t count, + ompi_datatype_t *datatype, + ucp_tag_t tag, + mca_pml_base_send_mode_t mode, + ucp_request_param_t *param) +{ + pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype); + + if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) { + return mca_pml_ucx_bsend(ep, buf, count, datatype, tag); + } else if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) { + return ucp_tag_send_sync_nb(ep, buf, count, + mca_pml_ucx_get_datatype(datatype), tag, + (ucp_send_callback_t)param->cb.send); + } else { + return ucp_tag_send_nbx(ep, buf, + mca_pml_ucx_get_data_size(op_data, count), + tag, param); + } +} +#endif + int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag, mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, @@ -761,10 +815,16 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype, return OMPI_ERROR; } +#if HAVE_DECL_UCP_TAG_SEND_NBX + req = (ompi_request_t*)mca_pml_ucx_common_send_nbx(ep, buf, count, datatype, + PML_UCX_MAKE_SEND_TAG(tag, comm), mode, + &mca_pml_ucx_get_op_data(datatype)->op_param.send); +#else req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype, mca_pml_ucx_get_datatype(datatype), PML_UCX_MAKE_SEND_TAG(tag, comm), mode, mca_pml_ucx_send_completion); +#endif if (req == NULL) { PML_UCX_VERBOSE(8, "returning completed request"); @@ -806,20 +866,40 @@ mca_pml_ucx_send_nb(ucp_ep_h ep, const void *buf, size_t count, #if HAVE_DECL_UCP_TAG_SEND_NBR static inline __opal_attribute_always_inline__ int mca_pml_ucx_send_nbr(ucp_ep_h ep, const void *buf, size_t count, - ucp_datatype_t ucx_datatype, ucp_tag_t tag) - + ompi_datatype_t *datatype, ucp_tag_t tag) { - ucs_status_ptr_t req; - ucs_status_t status; - /* coverity[bad_alloc_arithmetic] */ - req = PML_UCX_REQ_ALLOCA(); - status = ucp_tag_send_nbr(ep, buf, count, ucx_datatype, tag, req); + ucs_status_ptr_t req = PML_UCX_REQ_ALLOCA(); +#if HAVE_DECL_UCP_TAG_SEND_NBX + pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype); + ucp_request_param_t param = { + .op_attr_mask = UCP_OP_ATTR_FIELD_REQUEST | + (op_data->op_param.send.op_attr_mask & UCP_OP_ATTR_FIELD_DATATYPE) | + UCP_OP_ATTR_FLAG_FAST_CMPL, + .datatype = op_data->op_param.send.datatype, + .request = req + }; + + req = ucp_tag_send_nbx(ep, buf, + mca_pml_ucx_get_data_size(op_data, count), + tag, ¶m); + if (OPAL_LIKELY(req == UCS_OK)) { + return OMPI_SUCCESS; + } else if (UCS_PTR_IS_ERR(req)) { + PML_UCX_ERROR("%s failed: %d, %s", __func__, UCS_PTR_STATUS(req), + ucs_status_string(UCS_PTR_STATUS(req))); + return OPAL_ERROR; + } +#else + ucs_status_t status; + status = ucp_tag_send_nbr(ep, buf, count, + mca_pml_ucx_get_datatype(datatype), tag, req); if (OPAL_LIKELY(status == UCS_OK)) { return OMPI_SUCCESS; } +#endif - MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send", (void)0); + MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send nbr", (void)0); } #endif @@ -840,8 +920,7 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i #if HAVE_DECL_UCP_TAG_SEND_NBR if (OPAL_LIKELY((MCA_PML_BASE_SEND_BUFFERED != mode) && (MCA_PML_BASE_SEND_SYNCHRONOUS != mode))) { - return mca_pml_ucx_send_nbr(ep, buf, count, - mca_pml_ucx_get_datatype(datatype), + return mca_pml_ucx_send_nbr(ep, buf, count, datatype, PML_UCX_MAKE_SEND_TAG(tag, comm)); } #endif diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.c b/ompi/mca/pml/ucx/pml_ucx_datatype.c index 95f9da44cc..43ca2ce9eb 100644 --- a/ompi/mca/pml/ucx/pml_ucx_datatype.c +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.c @@ -8,12 +8,20 @@ */ #include "pml_ucx_datatype.h" +#include "pml_ucx_request.h" #include "ompi/runtime/mpiruntime.h" #include "ompi/attribute/attribute.h" #include +#include +#ifdef HAVE_UCP_REQUEST_PARAM_T +#define PML_UCX_DATATYPE_SET_VALUE(_datatype, _val) \ + (_datatype)->op_param.send._val; \ + (_datatype)->op_param.bsend._val; \ + (_datatype)->op_param.recv._val; +#endif static void* pml_ucx_generic_datatype_start_pack(void *context, const void *buffer, size_t count) @@ -133,31 +141,78 @@ int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval, { ucp_datatype_t ucp_datatype = (ucp_datatype_t)attr_val; +#ifdef HAVE_UCP_REQUEST_PARAM_T + free((void*)datatype->pml_data); +#else PML_UCX_ASSERT((uint64_t)ucp_datatype == datatype->pml_data); - +#endif ucp_dt_destroy(ucp_datatype); datatype->pml_data = PML_UCX_DATATYPE_INVALID; return OMPI_SUCCESS; } -ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype) +__opal_attribute_always_inline__ +static inline int mca_pml_ucx_datatype_is_contig(ompi_datatype_t *datatype) { - ucp_datatype_t ucp_datatype; - ucs_status_t status; ptrdiff_t lb; - size_t size; - int ret; ompi_datatype_type_lb(datatype, &lb); - if ((datatype->super.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && - (datatype->super.flags & OPAL_DATATYPE_FLAG_NO_GAPS) && - (lb == 0)) - { + return (datatype->super.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && + (datatype->super.flags & OPAL_DATATYPE_FLAG_NO_GAPS) && + (lb == 0); +} + +#ifdef HAVE_UCP_REQUEST_PARAM_T +__opal_attribute_always_inline__ static inline +pml_ucx_datatype_t *mca_pml_ucx_init_nbx_datatype(ompi_datatype_t *datatype, + ucp_datatype_t ucp_datatype, + size_t size) +{ + pml_ucx_datatype_t *pml_datatype; + int is_contig_pow2; + + pml_datatype = malloc(sizeof(*pml_datatype)); + if (pml_datatype == NULL) { + PML_UCX_ERROR("Failed to allocate datatype structure"); + ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); + } + + pml_datatype->datatype = ucp_datatype; + pml_datatype->op_param.send.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; + pml_datatype->op_param.send.cb.send = mca_pml_ucx_send_nbx_completion; + pml_datatype->op_param.bsend.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; + pml_datatype->op_param.bsend.cb.send = mca_pml_ucx_bsend_nbx_completion; + pml_datatype->op_param.recv.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | + UCP_OP_ATTR_FLAG_NO_IMM_CMPL; + pml_datatype->op_param.recv.cb.recv = mca_pml_ucx_recv_nbx_completion; + + is_contig_pow2 = mca_pml_ucx_datatype_is_contig(datatype) && + !(size & (size - 1)); /* is_pow2(size) */ + if (is_contig_pow2) { + pml_datatype->size_shift = (int)(log(size) / log(2.0)); /* log2(size) */ + } else { + pml_datatype->size_shift = 0; + PML_UCX_DATATYPE_SET_VALUE(pml_datatype, op_attr_mask |= UCP_OP_ATTR_FIELD_DATATYPE); + PML_UCX_DATATYPE_SET_VALUE(pml_datatype, datatype = ucp_datatype); + } + + return pml_datatype; +} +#endif + +ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype) +{ + size_t size = 0; /* init to suppress compiler warning */ + ucp_datatype_t ucp_datatype; + ucs_status_t status; + int ret; + + if (mca_pml_ucx_datatype_is_contig(datatype)) { ompi_datatype_type_size(datatype, &size); PML_UCX_ASSERT(size > 0); - datatype->pml_data = ucp_dt_make_contig(size); - return datatype->pml_data; + ucp_datatype = ucp_dt_make_contig(size); + goto out; } status = ucp_dt_create_generic(&pml_ucx_generic_datatype_ops, @@ -167,8 +222,6 @@ ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype) ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); } - datatype->pml_data = ucp_datatype; - /* Add custom attribute, to clean up UCX resources when OMPI datatype is * released. */ @@ -185,9 +238,18 @@ ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype) ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1); } } - +out: PML_UCX_VERBOSE(7, "created generic UCX datatype 0x%"PRIx64, ucp_datatype) +#ifdef HAVE_UCP_REQUEST_PARAM_T + UCS_STATIC_ASSERT(sizeof(datatype->pml_data) >= sizeof(pml_ucx_datatype_t*)); + datatype->pml_data = (uint64_t)mca_pml_ucx_init_nbx_datatype(datatype, + ucp_datatype, + size); +#else + datatype->pml_data = ucp_datatype; +#endif + return ucp_datatype; } diff --git a/ompi/mca/pml/ucx/pml_ucx_datatype.h b/ompi/mca/pml/ucx/pml_ucx_datatype.h index f5207cecc7..921aab0d39 100644 --- a/ompi/mca/pml/ucx/pml_ucx_datatype.h +++ b/ompi/mca/pml/ucx/pml_ucx_datatype.h @@ -15,13 +15,24 @@ #define PML_UCX_DATATYPE_INVALID 0 -struct pml_ucx_convertor { - opal_free_list_item_t super; - ompi_datatype_t *datatype; - opal_convertor_t opal_conv; - size_t offset; -}; +#ifdef HAVE_UCP_REQUEST_PARAM_T +typedef struct { + ucp_datatype_t datatype; + int size_shift; + struct { + ucp_request_param_t send; + ucp_request_param_t bsend; + ucp_request_param_t recv; + } op_param; +} pml_ucx_datatype_t; +#endif +struct pml_ucx_convertor { + opal_free_list_item_t super; + ompi_datatype_t *datatype; + opal_convertor_t opal_conv; + size_t offset; +}; ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype); @@ -31,15 +42,47 @@ int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval, OBJ_CLASS_DECLARATION(mca_pml_ucx_convertor_t); +__opal_attribute_always_inline__ static inline ucp_datatype_t mca_pml_ucx_get_datatype(ompi_datatype_t *datatype) { +#ifdef HAVE_UCP_REQUEST_PARAM_T + pml_ucx_datatype_t *ucp_type = (pml_ucx_datatype_t*)datatype->pml_data; + + if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) { + return ucp_type->datatype; + } +#else ucp_datatype_t ucp_type = datatype->pml_data; if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) { return ucp_type; } +#endif return mca_pml_ucx_init_datatype(datatype); } +#ifdef HAVE_UCP_REQUEST_PARAM_T +__opal_attribute_always_inline__ +static inline pml_ucx_datatype_t* +mca_pml_ucx_get_op_data(ompi_datatype_t *datatype) +{ + pml_ucx_datatype_t *ucp_type = (pml_ucx_datatype_t*)datatype->pml_data; + + if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) { + return ucp_type; + } + + mca_pml_ucx_init_datatype(datatype); + return (pml_ucx_datatype_t*)datatype->pml_data; +} + +__opal_attribute_always_inline__ +static inline size_t mca_pml_ucx_get_data_size(pml_ucx_datatype_t *op_data, + size_t count) +{ + return count << op_data->size_shift; +} +#endif + #endif /* PML_UCX_DATATYPE_H_ */ diff --git a/ompi/mca/pml/ucx/pml_ucx_request.c b/ompi/mca/pml/ucx/pml_ucx_request.c index 536ac95e79..b072afc66e 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.c +++ b/ompi/mca/pml/ucx/pml_ucx_request.c @@ -34,7 +34,8 @@ static int mca_pml_ucx_request_cancel(ompi_request_t *req, int flag) return OMPI_SUCCESS; } -void mca_pml_ucx_send_completion(void *request, ucs_status_t status) +__opal_attribute_always_inline__ static inline void +mca_pml_ucx_send_completion_internal(void *request, ucs_status_t status) { ompi_request_t *req = request; @@ -46,7 +47,8 @@ void mca_pml_ucx_send_completion(void *request, ucs_status_t status) ompi_request_complete(req, true); } -void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status) +__opal_attribute_always_inline__ static inline void +mca_pml_ucx_bsend_completion_internal(void *request, ucs_status_t status) { ompi_request_t *req = request; @@ -59,8 +61,9 @@ void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status) mca_pml_ucx_request_free(&req); } -void mca_pml_ucx_recv_completion(void *request, ucs_status_t status, - ucp_tag_recv_info_t *info) +__opal_attribute_always_inline__ static inline void +mca_pml_ucx_recv_completion_internal(void *request, ucs_status_t status, + const ucp_tag_recv_info_t *info) { ompi_request_t *req = request; @@ -73,6 +76,41 @@ void mca_pml_ucx_recv_completion(void *request, ucs_status_t status, ompi_request_complete(req, true); } +void mca_pml_ucx_send_completion(void *request, ucs_status_t status) +{ + mca_pml_ucx_send_completion_internal(request, status); +} + +void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status) +{ + mca_pml_ucx_bsend_completion_internal(request, status); +} + +void mca_pml_ucx_recv_completion(void *request, ucs_status_t status, + ucp_tag_recv_info_t *info) +{ + mca_pml_ucx_recv_completion_internal(request, status, info); +} + +void mca_pml_ucx_send_nbx_completion(void *request, ucs_status_t status, + void *user_data) +{ + mca_pml_ucx_send_completion_internal(request, status); +} + +void mca_pml_ucx_bsend_nbx_completion(void *request, ucs_status_t status, + void *user_data) +{ + mca_pml_ucx_bsend_completion_internal(request, status); +} + +void mca_pml_ucx_recv_nbx_completion(void *request, ucs_status_t status, + const ucp_tag_recv_info_t *info, + void *user_data) +{ + mca_pml_ucx_recv_completion_internal(request, status, info); +} + static void mca_pml_ucx_persistent_request_detach(mca_pml_ucx_persistent_request_t *preq, ompi_request_t *tmp_req) { diff --git a/ompi/mca/pml/ucx/pml_ucx_request.h b/ompi/mca/pml/ucx/pml_ucx_request.h index cb53d30bce..c9c017dc30 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.h +++ b/ompi/mca/pml/ucx/pml_ucx_request.h @@ -126,6 +126,16 @@ void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status); void mca_pml_ucx_precv_completion(void *request, ucs_status_t status, ucp_tag_recv_info_t *info); +void mca_pml_ucx_send_nbx_completion(void *request, ucs_status_t status, + void *user_data); + +void mca_pml_ucx_bsend_nbx_completion(void *request, ucs_status_t status, + void *user_data); + +void mca_pml_ucx_recv_nbx_completion(void *request, ucs_status_t status, + const ucp_tag_recv_info_t *info, + void *user_data); + void mca_pml_ucx_persistent_request_complete(mca_pml_ucx_persistent_request_t *preq, ompi_request_t *tmp_req); @@ -141,8 +151,9 @@ static inline void mca_pml_ucx_request_reset(ompi_request_t *req) req->req_complete = REQUEST_PENDING; } -static void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status, - ucs_status_t status) +__opal_attribute_always_inline__ +static inline void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status, + ucs_status_t status) { if (OPAL_LIKELY(status == UCS_OK)) { mpi_status->MPI_ERROR = MPI_SUCCESS;