1
1

pml_ucx: add ompi datatype attribute to release ucp_datatype

Signed-off-by: Yossi Itigin <yosefe@mellanox.com>
Этот коммит содержится в:
Yossi Itigin 2018-10-09 16:38:49 +03:00
родитель b0e6d1fefc
Коммит 4763822a64
4 изменённых файлов: 97 добавлений и 16 удалений

Просмотреть файл

@ -16,6 +16,7 @@
#include "opal/runtime/opal.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/attribute/attribute.h"
#include "ompi/message/message.h"
#include "ompi/mca/pml/base/pml_base_bsend.h"
#include "opal/mca/common/ucx/common_ucx.h"
@ -190,9 +191,9 @@ int mca_pml_ucx_close(void)
int mca_pml_ucx_init(void)
{
ucp_worker_params_t params;
ucs_status_t status;
ucp_worker_attr_t attr;
int rc;
ucs_status_t status;
int i, rc;
PML_UCX_VERBOSE(1, "mca_pml_ucx_init");
@ -209,30 +210,34 @@ int mca_pml_ucx_init(void)
&ompi_pml_ucx.ucp_worker);
if (UCS_OK != status) {
PML_UCX_ERROR("Failed to create UCP worker");
return OMPI_ERROR;
rc = OMPI_ERROR;
goto err;
}
attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE;
status = ucp_worker_query(ompi_pml_ucx.ucp_worker, &attr);
if (UCS_OK != status) {
ucp_worker_destroy(ompi_pml_ucx.ucp_worker);
ompi_pml_ucx.ucp_worker = NULL;
PML_UCX_ERROR("Failed to query UCP worker thread level");
return OMPI_ERROR;
rc = OMPI_ERROR;
goto err_destroy_worker;
}
if (ompi_mpi_thread_multiple && attr.thread_mode != UCS_THREAD_MODE_MULTI) {
if (ompi_mpi_thread_multiple && (attr.thread_mode != UCS_THREAD_MODE_MULTI)) {
/* UCX does not support multithreading, disqualify current PML for now */
/* TODO: we should let OMPI to fallback to THREAD_SINGLE mode */
ucp_worker_destroy(ompi_pml_ucx.ucp_worker);
ompi_pml_ucx.ucp_worker = NULL;
PML_UCX_ERROR("UCP worker does not support MPI_THREAD_MULTIPLE");
return OMPI_ERROR;
rc = OMPI_ERR_NOT_SUPPORTED;
goto err_destroy_worker;
}
rc = mca_pml_ucx_send_worker_address();
if (rc < 0) {
return rc;
goto err_destroy_worker;
}
ompi_pml_ucx.datatype_attr_keyval = MPI_KEYVAL_INVALID;
for (i = 0; i < OMPI_DATATYPE_MAX_PREDEFINED; ++i) {
ompi_pml_ucx.predefined_types[i] = PML_UCX_DATATYPE_INVALID;
}
/* Initialize the free lists */
@ -249,14 +254,33 @@ int mca_pml_ucx_init(void)
(void *)ompi_pml_ucx.ucp_context,
(void *)ompi_pml_ucx.ucp_worker);
return OMPI_SUCCESS;
err_destroy_worker:
ucp_worker_destroy(ompi_pml_ucx.ucp_worker);
ompi_pml_ucx.ucp_worker = NULL;
err:
return OMPI_ERROR;
}
int mca_pml_ucx_cleanup(void)
{
int i;
PML_UCX_VERBOSE(1, "mca_pml_ucx_cleanup");
opal_progress_unregister(mca_pml_ucx_progress);
if (ompi_pml_ucx.datatype_attr_keyval != MPI_KEYVAL_INVALID) {
ompi_attr_free_keyval(TYPE_ATTR, &ompi_pml_ucx.datatype_attr_keyval, false);
}
for (i = 0; i < OMPI_DATATYPE_MAX_PREDEFINED; ++i) {
if (ompi_pml_ucx.predefined_types[i] != PML_UCX_DATATYPE_INVALID) {
ucp_dt_destroy(ompi_pml_ucx.predefined_types[i]);
ompi_pml_ucx.predefined_types[i] = PML_UCX_DATATYPE_INVALID;
}
}
ompi_pml_ucx.completed_send_req.req_state = OMPI_REQUEST_INVALID;
OMPI_REQUEST_FINI(&ompi_pml_ucx.completed_send_req);
OBJ_DESTRUCT(&ompi_pml_ucx.completed_send_req);
@ -398,6 +422,22 @@ int mca_pml_ucx_del_procs(struct ompi_proc_t **procs, size_t nprocs)
int mca_pml_ucx_enable(bool enable)
{
ompi_attribute_fn_ptr_union_t copy_fn;
ompi_attribute_fn_ptr_union_t del_fn;
int ret;
/* Create a key for adding custom attributes to datatypes */
copy_fn.attr_datatype_copy_fn =
(MPI_Type_internal_copy_attr_function*)MPI_TYPE_NULL_COPY_FN;
del_fn.attr_datatype_delete_fn = mca_pml_ucx_datatype_attr_del_fn;
ret = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn,
&ompi_pml_ucx.datatype_attr_keyval, NULL, 0,
NULL);
if (ret != OMPI_SUCCESS) {
PML_UCX_ERROR("Failed to create keyval for UCX datatypes: %d", ret);
return ret;
}
PML_UCX_FREELIST_INIT(&ompi_pml_ucx.persistent_reqs,
mca_pml_ucx_persistent_request_t,
128, -1, 128);

Просмотреть файл

@ -15,6 +15,7 @@
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/datatype/ompi_datatype_internal.h"
#include "ompi/communicator/communicator.h"
#include "ompi/request/request.h"
#include "opal/mca/common/ucx/common_ucx.h"
@ -42,6 +43,10 @@ struct mca_pml_ucx_module {
ucp_context_h ucp_context;
ucp_worker_h ucp_worker;
/* Datatypes */
int datatype_attr_keyval;
ucp_datatype_t predefined_types[OMPI_DATATYPE_MPI_MAX_PREDEFINED];
/* Requests */
mca_pml_ucx_freelist_t persistent_reqs;
ompi_request_t completed_send_req;

Просмотреть файл

@ -10,6 +10,7 @@
#include "pml_ucx_datatype.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/attribute/attribute.h"
#include <inttypes.h>
@ -127,12 +128,25 @@ static ucp_generic_dt_ops_t pml_ucx_generic_datatype_ops = {
.finish = pml_ucx_generic_datatype_finish
};
int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval,
void *attr_val, void *extra)
{
ucp_datatype_t ucp_datatype = (ucp_datatype_t)attr_val;
PML_UCX_ASSERT((void*)ucp_datatype == datatype->pml_data);
ucp_dt_destroy(ucp_datatype);
datatype->pml_data = PML_UCX_DATATYPE_INVALID;
return OMPI_SUCCESS;
}
ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype)
{
ucp_datatype_t ucp_datatype;
ucs_status_t status;
ptrdiff_t lb;
size_t size;
int ret;
ompi_datatype_type_lb(datatype, &lb);
@ -147,16 +161,33 @@ ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype)
}
status = ucp_dt_create_generic(&pml_ucx_generic_datatype_ops,
datatype, &ucp_datatype);
datatype, &ucp_datatype);
if (status != UCS_OK) {
PML_UCX_ERROR("Failed to create UCX datatype for %s", datatype->name);
ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1);
}
PML_UCX_VERBOSE(7, "created generic UCX datatype 0x%"PRIx64, ucp_datatype)
// TODO put this on a list to be destroyed later
datatype->pml_data = ucp_datatype;
/* Add custom attribute, to clean up UCX resources when OMPI datatype is
* released.
*/
if (ompi_datatype_is_predefined(datatype)) {
PML_UCX_ASSERT(datatype->id < OMPI_DATATYPE_MAX_PREDEFINED);
ompi_pml_ucx.predefined_types[datatype->id] = ucp_datatype;
} else {
ret = ompi_attr_set_c(TYPE_ATTR, datatype, &datatype->d_keyhash,
ompi_pml_ucx.datatype_attr_keyval,
(void*)ucp_datatype, false);
if (ret != OMPI_SUCCESS) {
PML_UCX_ERROR("Failed to add UCX datatype attribute for %s: %d",
datatype->name, ret);
ompi_mpi_abort(&ompi_mpi_comm_world.comm, 1);
}
}
PML_UCX_VERBOSE(7, "created generic UCX datatype 0x%"PRIx64, ucp_datatype)
return ucp_datatype;
}

Просмотреть файл

@ -13,6 +13,8 @@
#include "pml_ucx.h"
#define PML_UCX_DATATYPE_INVALID 0
struct pml_ucx_convertor {
opal_free_list_item_t super;
ompi_datatype_t *datatype;
@ -23,6 +25,9 @@ struct pml_ucx_convertor {
ucp_datatype_t mca_pml_ucx_init_datatype(ompi_datatype_t *datatype);
int mca_pml_ucx_datatype_attr_del_fn(ompi_datatype_t* datatype, int keyval,
void *attr_val, void *extra);
OBJ_CLASS_DECLARATION(mca_pml_ucx_convertor_t);
@ -30,7 +35,7 @@ static inline ucp_datatype_t mca_pml_ucx_get_datatype(ompi_datatype_t *datatype)
{
ucp_datatype_t ucp_type = datatype->pml_data;
if (OPAL_LIKELY(ucp_type != 0)) {
if (OPAL_LIKELY(ucp_type != PML_UCX_DATATYPE_INVALID)) {
return ucp_type;
}