1
1

OPAL/UCX: enabling new API provided by UCX

- added detection of new API into configuration
- added tag_send call implemented using new API
- added MPI_Send/MPI_Isend/MPI_Recv/MPI_Irecv implementations

Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
(cherry picked from commit 75bda25ddb)
Этот коммит содержится в:
Sergey Oblomov 2020-01-31 14:02:21 +02:00
родитель 849b560c70
Коммит 7c621acf1b
6 изменённых файлов: 286 добавлений и 45 удалений

Просмотреть файл

@ -128,6 +128,14 @@ AC_DEFUN([OMPI_CHECK_UCX],[
[AC_DEFINE([HAVE_UCP_WORKER_ADDRESS_FLAGS], [1],
[have worker address attribute])], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucp_tag_send_nbx,
ucp_tag_send_sync_nbx,
ucp_tag_recv_nbx],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_TYPES([ucp_request_param_t],
[], [],
[[#include <ucp/api/ucp.h>]])
CPPFLAGS=$old_CPPFLAGS
OPAL_SUMMARY_ADD([[Transports]],[[Open UCX]],[$1],[$ompi_check_ucx_happy])])])

Просмотреть файл

@ -360,10 +360,10 @@ int mca_pml_ucx_cleanup(void)
static ucp_ep_h mca_pml_ucx_add_proc_common(ompi_proc_t *proc)
{
size_t addrlen = 0;
ucp_ep_params_t ep_params;
ucp_address_t *address;
ucs_status_t status;
size_t addrlen;
ucp_ep_h ep;
int ret;
@ -431,6 +431,7 @@ int mca_pml_ucx_add_procs(struct ompi_proc_t **procs, size_t nprocs)
return OMPI_SUCCESS;
}
__opal_attribute_always_inline__
static inline ucp_ep_h mca_pml_ucx_get_ep(ompi_communicator_t *comm, int rank)
{
ucp_ep_h ep;
@ -556,6 +557,11 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype,
int src, int tag, struct ompi_communicator_t* comm,
struct ompi_request_t **request)
{
#if HAVE_DECL_UCP_TAG_RECV_NBX
pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype);
ucp_request_param_t *param = &op_data->op_param.recv;
#endif
ucp_tag_t ucp_tag, ucp_tag_mask;
ompi_request_t *req;
@ -563,10 +569,16 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype,
(void*)request);
PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm);
#if HAVE_DECL_UCP_TAG_RECV_NBX
req = (ompi_request_t*)ucp_tag_recv_nbx(ompi_pml_ucx.ucp_worker, buf,
mca_pml_ucx_get_data_size(op_data, count),
ucp_tag, ucp_tag_mask, param);
#else
req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, buf, count,
mca_pml_ucx_get_datatype(datatype),
ucp_tag, ucp_tag_mask,
mca_pml_ucx_recv_completion);
#endif
if (UCS_PTR_IS_ERR(req)) {
PML_UCX_ERROR("ucx recv failed: %s", ucs_status_string(UCS_PTR_STATUS(req)));
return OMPI_ERROR;
@ -582,20 +594,34 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src
int tag, struct ompi_communicator_t* comm,
ompi_status_public_t* mpi_status)
{
/* coverity[bad_alloc_arithmetic] */
void *req = PML_UCX_REQ_ALLOCA();
#if HAVE_DECL_UCP_TAG_RECV_NBX
pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype);
ucp_request_param_t *recv_param = &op_data->op_param.recv;
ucp_request_param_t param;
param.op_attr_mask = UCP_OP_ATTR_FIELD_REQUEST |
(recv_param->op_attr_mask & UCP_OP_ATTR_FIELD_DATATYPE);
param.datatype = recv_param->datatype;
param.request = req;
#endif
ucp_tag_t ucp_tag, ucp_tag_mask;
ucp_tag_recv_info_t info;
ucs_status_t status;
void *req;
PML_UCX_TRACE_RECV("%s", buf, count, datatype, src, tag, comm, "recv");
/* coverity[bad_alloc_arithmetic] */
PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm);
req = PML_UCX_REQ_ALLOCA();
status = ucp_tag_recv_nbr(ompi_pml_ucx.ucp_worker, buf, count,
mca_pml_ucx_get_datatype(datatype),
ucp_tag, ucp_tag_mask, req);
#if HAVE_DECL_UCP_TAG_RECV_NBX
ucp_tag_recv_nbx(ompi_pml_ucx.ucp_worker, buf,
mca_pml_ucx_get_data_size(op_data, count),
ucp_tag, ucp_tag_mask, &param);
#else
ucp_tag_recv_nbr(ompi_pml_ucx.ucp_worker, buf, count,
mca_pml_ucx_get_datatype(datatype),
ucp_tag, ucp_tag_mask, req);
#endif
MCA_COMMON_UCX_PROGRESS_LOOP(ompi_pml_ucx.ucp_worker) {
status = ucp_request_test(req, &info);
if (status != UCS_INPROGRESS) {
@ -605,6 +631,7 @@ int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src
}
}
__opal_attribute_always_inline__
static inline const char *mca_pml_ucx_send_mode_name(mca_pml_base_send_mode_t mode)
{
switch (mode) {
@ -726,6 +753,7 @@ mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count,
return NULL;
}
__opal_attribute_always_inline__
static inline ucs_status_ptr_t mca_pml_ucx_common_send(ucp_ep_h ep, const void *buf,
size_t count,
ompi_datatype_t *datatype,
@ -743,6 +771,32 @@ static inline ucs_status_ptr_t mca_pml_ucx_common_send(ucp_ep_h ep, const void *
}
}
#if HAVE_DECL_UCP_TAG_SEND_NBX
__opal_attribute_always_inline__
static inline ucs_status_ptr_t
mca_pml_ucx_common_send_nbx(ucp_ep_h ep, const void *buf,
size_t count,
ompi_datatype_t *datatype,
ucp_tag_t tag,
mca_pml_base_send_mode_t mode,
ucp_request_param_t *param)
{
pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype);
if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_BUFFERED == mode)) {
return mca_pml_ucx_bsend(ep, buf, count, datatype, tag);
} else if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) {
return ucp_tag_send_sync_nb(ep, buf, count,
mca_pml_ucx_get_datatype(datatype), tag,
(ucp_send_callback_t)param->cb.send);
} else {
return ucp_tag_send_nbx(ep, buf,
mca_pml_ucx_get_data_size(op_data, count),
tag, param);
}
}
#endif
int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype,
int dst, int tag, mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
@ -761,10 +815,16 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype,
return OMPI_ERROR;
}
#if HAVE_DECL_UCP_TAG_SEND_NBX
req = (ompi_request_t*)mca_pml_ucx_common_send_nbx(ep, buf, count, datatype,
PML_UCX_MAKE_SEND_TAG(tag, comm), mode,
&mca_pml_ucx_get_op_data(datatype)->op_param.send);
#else
req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype,
mca_pml_ucx_get_datatype(datatype),
PML_UCX_MAKE_SEND_TAG(tag, comm), mode,
mca_pml_ucx_send_completion);
#endif
if (req == NULL) {
PML_UCX_VERBOSE(8, "returning completed request");
@ -806,20 +866,40 @@ mca_pml_ucx_send_nb(ucp_ep_h ep, const void *buf, size_t count,
#if HAVE_DECL_UCP_TAG_SEND_NBR
static inline __opal_attribute_always_inline__ int
mca_pml_ucx_send_nbr(ucp_ep_h ep, const void *buf, size_t count,
ucp_datatype_t ucx_datatype, ucp_tag_t tag)
ompi_datatype_t *datatype, ucp_tag_t tag)
{
ucs_status_ptr_t req;
ucs_status_t status;
/* coverity[bad_alloc_arithmetic] */
req = PML_UCX_REQ_ALLOCA();
status = ucp_tag_send_nbr(ep, buf, count, ucx_datatype, tag, req);
ucs_status_ptr_t req = PML_UCX_REQ_ALLOCA();
#if HAVE_DECL_UCP_TAG_SEND_NBX
pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype);
ucp_request_param_t param = {
.op_attr_mask = UCP_OP_ATTR_FIELD_REQUEST |
(op_data->op_param.send.op_attr_mask & UCP_OP_ATTR_FIELD_DATATYPE) |
UCP_OP_ATTR_FLAG_FAST_CMPL,
.datatype = op_data->op_param.send.datatype,
.request = req
};
req = ucp_tag_send_nbx(ep, buf,
mca_pml_ucx_get_data_size(op_data, count),
tag, &param);
if (OPAL_LIKELY(req == UCS_OK)) {
return OMPI_SUCCESS;
} else if (UCS_PTR_IS_ERR(req)) {
PML_UCX_ERROR("%s failed: %d, %s", __func__, UCS_PTR_STATUS(req),
ucs_status_string(UCS_PTR_STATUS(req)));
return OPAL_ERROR;
}
#else
ucs_status_t status;
status = ucp_tag_send_nbr(ep, buf, count,
mca_pml_ucx_get_datatype(datatype), tag, req);
if (OPAL_LIKELY(status == UCS_OK)) {
return OMPI_SUCCESS;
}
#endif
MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send", (void)0);
MCA_COMMON_UCX_WAIT_LOOP(req, ompi_pml_ucx.ucp_worker, "ucx send nbr", (void)0);
}
#endif
@ -840,8 +920,7 @@ int mca_pml_ucx_send(const void *buf, size_t count, ompi_datatype_t *datatype, i
#if HAVE_DECL_UCP_TAG_SEND_NBR
if (OPAL_LIKELY((MCA_PML_BASE_SEND_BUFFERED != mode) &&
(MCA_PML_BASE_SEND_SYNCHRONOUS != mode))) {
return mca_pml_ucx_send_nbr(ep, buf, count,
mca_pml_ucx_get_datatype(datatype),
return mca_pml_ucx_send_nbr(ep, buf, count, datatype,
PML_UCX_MAKE_SEND_TAG(tag, comm));
}
#endif

Просмотреть файл

@ -8,12 +8,20 @@
*/
#include "pml_ucx_datatype.h"
#include "pml_ucx_request.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/attribute/attribute.h"
#include <inttypes.h>
#include <math.h>
#ifdef HAVE_UCP_REQUEST_PARAM_T
#define PML_UCX_DATATYPE_SET_VALUE(_datatype, _val) \
(_datatype)->op_param.send._val; \
(_datatype)->op_param.bsend._val; \
(_datatype)->op_param.recv._val;
#endif
static void* pml_ucx_generic_datatype_start_pack(void *context, const void *buffer,
size_t count)
@ -133,31 +141,78 @@ int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval,
{
ucp_datatype_t ucp_datatype = (ucp_datatype_t)attr_val;
#ifdef HAVE_UCP_REQUEST_PARAM_T
free((void*)datatype->pml_data);
#else
PML_UCX_ASSERT((uint64_t)ucp_datatype == datatype->pml_data);
#endif
ucp_dt_destroy(ucp_datatype);
datatype->pml_data = PML_UCX_DATATYPE_INVALID;
return OMPI_SUCCESS;
}
ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype)
__opal_attribute_always_inline__
static inline int mca_pml_ucx_datatype_is_contig(ompi_datatype_t *datatype)
{
ucp_datatype_t ucp_datatype;
ucs_status_t status;
ptrdiff_t lb;
size_t size;
int ret;
ompi_datatype_type_lb(datatype, &lb);
if ((datatype->super.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) &&
(datatype->super.flags & OPAL_DATATYPE_FLAG_NO_GAPS) &&
(lb == 0))
{
return (datatype->super.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) &&
(datatype->super.flags & OPAL_DATATYPE_FLAG_NO_GAPS) &&
(lb == 0);
}
#ifdef HAVE_UCP_REQUEST_PARAM_T
__opal_attribute_always_inline__ static inline
pml_ucx_datatype_t *mca_pml_ucx_init_nbx_datatype(ompi_datatype_t *datatype,
ucp_datatype_t ucp_datatype,
size_t size)
{
pml_ucx_datatype_t *pml_datatype;
int is_contig_pow2;
pml_datatype = malloc(sizeof(*pml_datatype));
if (pml_datatype == NULL) {
PML_UCX_ERROR("Failed to allocate datatype structure");
ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1);
}
pml_datatype->datatype = ucp_datatype;
pml_datatype->op_param.send.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK;
pml_datatype->op_param.send.cb.send = mca_pml_ucx_send_nbx_completion;
pml_datatype->op_param.bsend.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK;
pml_datatype->op_param.bsend.cb.send = mca_pml_ucx_bsend_nbx_completion;
pml_datatype->op_param.recv.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
UCP_OP_ATTR_FLAG_NO_IMM_CMPL;
pml_datatype->op_param.recv.cb.recv = mca_pml_ucx_recv_nbx_completion;
is_contig_pow2 = mca_pml_ucx_datatype_is_contig(datatype) &&
!(size & (size - 1)); /* is_pow2(size) */
if (is_contig_pow2) {
pml_datatype->size_shift = (int)(log(size) / log(2.0)); /* log2(size) */
} else {
pml_datatype->size_shift = 0;
PML_UCX_DATATYPE_SET_VALUE(pml_datatype, op_attr_mask |= UCP_OP_ATTR_FIELD_DATATYPE);
PML_UCX_DATATYPE_SET_VALUE(pml_datatype, datatype = ucp_datatype);
}
return pml_datatype;
}
#endif
ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype)
{
size_t size = 0; /* init to suppress compiler warning */
ucp_datatype_t ucp_datatype;
ucs_status_t status;
int ret;
if (mca_pml_ucx_datatype_is_contig(datatype)) {
ompi_datatype_type_size(datatype, &size);
PML_UCX_ASSERT(size > 0);
datatype->pml_data = ucp_dt_make_contig(size);
return datatype->pml_data;
ucp_datatype = ucp_dt_make_contig(size);
goto out;
}
status = ucp_dt_create_generic(&pml_ucx_generic_datatype_ops,
@ -167,8 +222,6 @@ ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype)
ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1);
}
datatype->pml_data = ucp_datatype;
/* Add custom attribute, to clean up UCX resources when OMPI datatype is
* released.
*/
@ -185,9 +238,18 @@ ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype)
ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1);
}
}
out:
PML_UCX_VERBOSE(7, "created generic UCX datatype 0x%"PRIx64, ucp_datatype)
#ifdef HAVE_UCP_REQUEST_PARAM_T
UCS_STATIC_ASSERT(sizeof(datatype->pml_data) >= sizeof(pml_ucx_datatype_t*));
datatype->pml_data = (uint64_t)mca_pml_ucx_init_nbx_datatype(datatype,
ucp_datatype,
size);
#else
datatype->pml_data = ucp_datatype;
#endif
return ucp_datatype;
}

Просмотреть файл

@ -15,13 +15,24 @@
#define PML_UCX_DATATYPE_INVALID 0
struct pml_ucx_convertor {
opal_free_list_item_t super;
ompi_datatype_t *datatype;
opal_convertor_t opal_conv;
size_t offset;
};
#ifdef HAVE_UCP_REQUEST_PARAM_T
typedef struct {
ucp_datatype_t datatype;
int size_shift;
struct {
ucp_request_param_t send;
ucp_request_param_t bsend;
ucp_request_param_t recv;
} op_param;
} pml_ucx_datatype_t;
#endif
struct pml_ucx_convertor {
opal_free_list_item_t super;
ompi_datatype_t *datatype;
opal_convertor_t opal_conv;
size_t offset;
};
ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype);
@ -31,15 +42,47 @@ int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval,
OBJ_CLASS_DECLARATION(mca_pml_ucx_convertor_t);
__opal_attribute_always_inline__
static inline ucp_datatype_t mca_pml_ucx_get_datatype(ompi_datatype_t *datatype)
{
#ifdef HAVE_UCP_REQUEST_PARAM_T
pml_ucx_datatype_t *ucp_type = (pml_ucx_datatype_t*)datatype->pml_data;
if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) {
return ucp_type->datatype;
}
#else
ucp_datatype_t ucp_type = datatype->pml_data;
if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) {
return ucp_type;
}
#endif
return mca_pml_ucx_init_datatype(datatype);
}
#ifdef HAVE_UCP_REQUEST_PARAM_T
__opal_attribute_always_inline__
static inline pml_ucx_datatype_t*
mca_pml_ucx_get_op_data(ompi_datatype_t *datatype)
{
pml_ucx_datatype_t *ucp_type = (pml_ucx_datatype_t*)datatype->pml_data;
if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) {
return ucp_type;
}
mca_pml_ucx_init_datatype(datatype);
return (pml_ucx_datatype_t*)datatype->pml_data;
}
__opal_attribute_always_inline__
static inline size_t mca_pml_ucx_get_data_size(pml_ucx_datatype_t *op_data,
size_t count)
{
return count << op_data->size_shift;
}
#endif
#endif /* PML_UCX_DATATYPE_H_ */

Просмотреть файл

@ -34,7 +34,8 @@ static int mca_pml_ucx_request_cancel(ompi_request_t *req, int flag)
return OMPI_SUCCESS;
}
void mca_pml_ucx_send_completion(void *request, ucs_status_t status)
__opal_attribute_always_inline__ static inline void
mca_pml_ucx_send_completion_internal(void *request, ucs_status_t status)
{
ompi_request_t *req = request;
@ -46,7 +47,8 @@ void mca_pml_ucx_send_completion(void *request, ucs_status_t status)
ompi_request_complete(req, true);
}
void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status)
__opal_attribute_always_inline__ static inline void
mca_pml_ucx_bsend_completion_internal(void *request, ucs_status_t status)
{
ompi_request_t *req = request;
@ -59,8 +61,9 @@ void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status)
mca_pml_ucx_request_free(&req);
}
void mca_pml_ucx_recv_completion(void *request, ucs_status_t status,
ucp_tag_recv_info_t *info)
__opal_attribute_always_inline__ static inline void
mca_pml_ucx_recv_completion_internal(void *request, ucs_status_t status,
const ucp_tag_recv_info_t *info)
{
ompi_request_t *req = request;
@ -73,6 +76,41 @@ void mca_pml_ucx_recv_completion(void *request, ucs_status_t status,
ompi_request_complete(req, true);
}
void mca_pml_ucx_send_completion(void *request, ucs_status_t status)
{
mca_pml_ucx_send_completion_internal(request, status);
}
void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status)
{
mca_pml_ucx_bsend_completion_internal(request, status);
}
void mca_pml_ucx_recv_completion(void *request, ucs_status_t status,
ucp_tag_recv_info_t *info)
{
mca_pml_ucx_recv_completion_internal(request, status, info);
}
void mca_pml_ucx_send_nbx_completion(void *request, ucs_status_t status,
void *user_data)
{
mca_pml_ucx_send_completion_internal(request, status);
}
void mca_pml_ucx_bsend_nbx_completion(void *request, ucs_status_t status,
void *user_data)
{
mca_pml_ucx_bsend_completion_internal(request, status);
}
void mca_pml_ucx_recv_nbx_completion(void *request, ucs_status_t status,
const ucp_tag_recv_info_t *info,
void *user_data)
{
mca_pml_ucx_recv_completion_internal(request, status, info);
}
static void mca_pml_ucx_persistent_request_detach(mca_pml_ucx_persistent_request_t *preq,
ompi_request_t *tmp_req)
{

Просмотреть файл

@ -126,6 +126,16 @@ void mca_pml_ucx_bsend_completion(void *request, ucs_status_t status);
void mca_pml_ucx_precv_completion(void *request, ucs_status_t status,
ucp_tag_recv_info_t *info);
void mca_pml_ucx_send_nbx_completion(void *request, ucs_status_t status,
void *user_data);
void mca_pml_ucx_bsend_nbx_completion(void *request, ucs_status_t status,
void *user_data);
void mca_pml_ucx_recv_nbx_completion(void *request, ucs_status_t status,
const ucp_tag_recv_info_t *info,
void *user_data);
void mca_pml_ucx_persistent_request_complete(mca_pml_ucx_persistent_request_t *preq,
ompi_request_t *tmp_req);
@ -141,8 +151,9 @@ static inline void mca_pml_ucx_request_reset(ompi_request_t *req)
req->req_complete = REQUEST_PENDING;
}
static void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status,
ucs_status_t status)
__opal_attribute_always_inline__
static inline void mca_pml_ucx_set_send_status(ompi_status_public_t* mpi_status,
ucs_status_t status)
{
if (OPAL_LIKELY(status == UCS_OK)) {
mpi_status->MPI_ERROR = MPI_SUCCESS;