mtl mxm: update configuration parsing api to mxm 2.1, drop
older version support (1.0 and 1.1), and cleanup the code. reviewed by miked. cmr=v1.7.4:reviewer=ompi-gk1.7 This commit was SVN r29797.
Этот коммит содержится в:
родитель
4ee10e7589
Коммит
a913b00f89
@ -89,7 +89,7 @@ static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
|
||||
void *cbdata, bool from_alloc);
|
||||
#endif
|
||||
|
||||
#if MXM_API < MXM_VERSION(2, 0)
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
|
||||
{
|
||||
size_t addrlen;
|
||||
@ -143,64 +143,13 @@ ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_bitmap, int lr,
|
||||
{
|
||||
mxm_error_t err;
|
||||
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
mxm_ep_opts_t ep_opt;
|
||||
struct sockaddr_mxm_local_proc sa_bind_self;
|
||||
struct sockaddr_mxm_ib_local sa_bind_rdma;
|
||||
struct sockaddr_mxm_shm_proc sa_bind_shm;
|
||||
|
||||
mxm_fill_ep_opts(&ep_opt);
|
||||
|
||||
sa_bind_self.sa_family = AF_MXM_LOCAL_PROC;
|
||||
sa_bind_self.context_id = lr;
|
||||
|
||||
sa_bind_rdma.sa_family = AF_MXM_IB_LOCAL;
|
||||
sa_bind_rdma.lid = 0;
|
||||
sa_bind_rdma.pkey = 0;
|
||||
sa_bind_rdma.qp_num = 0;
|
||||
sa_bind_rdma.sl = 0;
|
||||
|
||||
sa_bind_shm.sa_family = AF_MXM_SHM_PROC;
|
||||
sa_bind_shm.jobid = jobid;
|
||||
sa_bind_shm.process_id = lr;
|
||||
sa_bind_shm.context_id = mxlr;
|
||||
sa_bind_shm.num_procs = nlps;
|
||||
|
||||
ep_opt.ptl_bind_addr[MXM_PTL_SELF] =
|
||||
(ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) ?
|
||||
(struct sockaddr*) &sa_bind_self : NULL;
|
||||
ep_opt.ptl_bind_addr[MXM_PTL_RDMA] =
|
||||
(ptl_bitmap & MXM_BIT(MXM_PTL_RDMA)) ?
|
||||
(struct sockaddr*) &sa_bind_rdma : NULL;
|
||||
ep_opt.ptl_bind_addr[MXM_PTL_SHM] =
|
||||
(ptl_bitmap & MXM_BIT(MXM_PTL_SHM)) ?
|
||||
(struct sockaddr*) &sa_bind_shm : NULL;
|
||||
|
||||
MXM_VERBOSE(1, "MXM version is old, consider to upgrade");
|
||||
err = mxm_ep_create(ctx, &ep_opt, ep);
|
||||
#elif MXM_API < MXM_VERSION(2,0)
|
||||
mxm_ep_opts_t *ep_opts;
|
||||
err = mxm_config_read_ep_opts(&ep_opts);
|
||||
if (err != MXM_OK) {
|
||||
MXM_ERROR("Failed to parse MXM configuration");
|
||||
return err;
|
||||
}
|
||||
|
||||
ep_opts->job_id = jobid;
|
||||
ep_opts->local_rank = lr;
|
||||
ep_opts->num_local_procs = nlps;
|
||||
err = mxm_ep_create(ctx, ep_opts, ep);
|
||||
mxm_config_free(ep_opts);
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
ompi_mtl_mxm.mxm_ep_opts->job_id = jobid;
|
||||
ompi_mtl_mxm.mxm_ep_opts->local_rank = lr;
|
||||
ompi_mtl_mxm.mxm_ep_opts->num_local_procs = nlps;
|
||||
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
|
||||
#else
|
||||
mxm_ep_opts_t *ep_opts;
|
||||
err = mxm_config_read_ep_opts(&ep_opts);
|
||||
if (err != MXM_OK) {
|
||||
MXM_ERROR("Failed to parse MXM configuration");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = mxm_ep_create(ctx, ep_opts, ep);
|
||||
mxm_config_free_ep_opts(ep_opts);
|
||||
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
@ -367,10 +316,8 @@ int ompi_mtl_mxm_module_init(void)
|
||||
}
|
||||
|
||||
/* Setup the endpoint options and local addresses to bind to. */
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
ptl_bitmap = ompi_mtl_mxm.mxm_opts.ptl_bitmap;
|
||||
#elif MXM_API < MXM_VERSION(2,0)
|
||||
ptl_bitmap = ompi_mtl_mxm.mxm_opts->ptl_bitmap;
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
ptl_bitmap = ompi_mtl_mxm.mxm_ctx_opts->ptl_bitmap;
|
||||
#else
|
||||
ptl_bitmap = 0;
|
||||
#endif
|
||||
@ -488,6 +435,7 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
||||
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
|
||||
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
|
||||
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
|
||||
|
||||
#else
|
||||
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
|
||||
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
|
||||
@ -504,13 +452,12 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
/* Connect to remote peers */
|
||||
timeout = (mxm_get_version() < MXM_VERSION(1,5)) ? 1000 : -1;
|
||||
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, timeout);
|
||||
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, -1);
|
||||
if (MXM_OK != err) {
|
||||
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
|
||||
for (i = 0; i < nprocs; ++i) {
|
||||
if (MXM_OK != conn_reqs[i].error) {
|
||||
MXM_ERROR("MXM EP connect to %s error: %s\n",
|
||||
MXM_ERROR("MXM EP connect to %s error: %s\n",
|
||||
(NULL == procs[i]->proc_hostname) ?
|
||||
"unknown" : procs[i]->proc_hostname,
|
||||
mxm_error_string(conn_reqs[i].error));
|
||||
@ -527,7 +474,9 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
||||
endpoint->mxm_conn = conn_reqs[i].conn;
|
||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
bail:
|
||||
@ -601,10 +550,8 @@ static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if MXM_API >= MXM_VERSION(1,5)
|
||||
OBJ_CLASS_INSTANCE(
|
||||
ompi_mtl_mxm_message_t,
|
||||
ompi_free_list_item_t,
|
||||
NULL,
|
||||
NULL);
|
||||
#endif
|
||||
|
@ -18,7 +18,12 @@
|
||||
#ifndef MXM_VERSION
|
||||
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
|
||||
#endif
|
||||
#if MXM_API < MXM_VERSION(2, 0)
|
||||
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
#error "Unsupported MXM version, version 1.5 or above required"
|
||||
#endif
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
#include <mxm/api/mxm_addr.h>
|
||||
#endif
|
||||
|
||||
@ -89,7 +94,6 @@ extern int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl);
|
||||
|
||||
int ompi_mtl_mxm_module_init(void);
|
||||
|
||||
#if MXM_API >= MXM_VERSION(1,5)
|
||||
struct ompi_mtl_mxm_message_t {
|
||||
ompi_free_list_item_t super;
|
||||
|
||||
@ -102,7 +106,6 @@ struct ompi_mtl_mxm_message_t {
|
||||
};
|
||||
typedef struct ompi_mtl_mxm_message_t ompi_mtl_mxm_message_t;
|
||||
OBJ_CLASS_DECLARATION(ompi_mtl_mxm_message_t);
|
||||
#endif
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -72,7 +72,7 @@ static int ompi_mtl_mxm_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&ompi_mtl_mxm.verbose);
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
#if MXM_API > MXM_VERSION(2,0)
|
||||
ompi_mtl_mxm.mxm_np = 0;
|
||||
#else
|
||||
ompi_mtl_mxm.mxm_np = 128;
|
||||
@ -92,6 +92,7 @@ static int ompi_mtl_mxm_component_open(void)
|
||||
{
|
||||
mxm_error_t err;
|
||||
unsigned long cur_ver;
|
||||
int rc;
|
||||
|
||||
mca_mtl_mxm_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
|
||||
@ -108,11 +109,6 @@ static int ompi_mtl_mxm_component_open(void)
|
||||
}
|
||||
return OMPI_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
mxm_fill_context_opts(&ompi_mtl_mxm.mxm_opts);
|
||||
err = mxm_init(&ompi_mtl_mxm.mxm_opts, &ompi_mtl_mxm.mxm_context);
|
||||
MXM_VERBOSE(1, "mxm component open");
|
||||
#else
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
/* Register memory hooks */
|
||||
@ -130,15 +126,21 @@ static int ompi_mtl_mxm_component_open(void)
|
||||
setenv("MXM_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y" , 0);
|
||||
#endif
|
||||
|
||||
err = mxm_config_read_context_opts(&ompi_mtl_mxm.mxm_opts);
|
||||
if (err != MXM_OK) {
|
||||
#if MXM_API >= MXM_VERSION(2,1)
|
||||
if (MXM_OK != mxm_config_read_opts(&ompi_mtl_mxm.mxm_ctx_opts,
|
||||
&ompi_mtl_mxm.mxm_ep_opts,
|
||||
"MPI", NULL, 0))
|
||||
#else
|
||||
if ((MXM_OK != mxm_config_read_context_opts(&ompi_mtl_mxm.mxm_ctx_opts)) ||
|
||||
(MXM_OK != mxm_config_read_ep_opts(&ompi_mtl_mxm.mxm_ep_opts)))
|
||||
#endif
|
||||
{
|
||||
MXM_ERROR("Failed to parse MXM configuration");
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
err = mxm_init(ompi_mtl_mxm.mxm_opts, &ompi_mtl_mxm.mxm_context);
|
||||
err = mxm_init(ompi_mtl_mxm.mxm_ctx_opts, &ompi_mtl_mxm.mxm_context);
|
||||
MXM_VERBOSE(1, "mxm component open");
|
||||
#endif
|
||||
|
||||
if (MXM_OK != err) {
|
||||
if (MXM_ERR_NO_DEVICE == err) {
|
||||
@ -150,10 +152,6 @@ static int ompi_mtl_mxm_component_open(void)
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
#if MXM_API >= MXM_VERSION(1,5)
|
||||
{
|
||||
int rc;
|
||||
|
||||
OBJ_CONSTRUCT(&mca_mtl_mxm_component.mxm_messages, ompi_free_list_t);
|
||||
rc = ompi_free_list_init_new(&mca_mtl_mxm_component.mxm_messages,
|
||||
sizeof(ompi_mtl_mxm_message_t),
|
||||
@ -169,8 +167,6 @@ static int ompi_mtl_mxm_component_open(void)
|
||||
mxm_error_string(err));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -183,9 +179,13 @@ static int ompi_mtl_mxm_component_close(void)
|
||||
if ((cur_ver == MXM_API) && (ompi_mtl_mxm.mxm_context != NULL)) {
|
||||
mxm_cleanup(ompi_mtl_mxm.mxm_context);
|
||||
ompi_mtl_mxm.mxm_context = NULL;
|
||||
|
||||
#if MXM_API >= MXM_VERSION(1,5)
|
||||
OBJ_DESTRUCT(&mca_mtl_mxm_component.mxm_messages);
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
mxm_config_free_ep_opts(ompi_mtl_mxm.mxm_ep_opts);
|
||||
mxm_config_free_context_opts(ompi_mtl_mxm.mxm_ctx_opts);
|
||||
#else
|
||||
mxm_config_free(ompi_mtl_mxm.mxm_ep_opts);
|
||||
mxm_config_free(ompi_mtl_mxm.mxm_ctx_opts);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -54,7 +54,6 @@ int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_message_t **message,
|
||||
struct ompi_status_public_t *status)
|
||||
{
|
||||
#if MXM_API >= MXM_VERSION(1,5)
|
||||
mxm_error_t err;
|
||||
mxm_recv_req_t req;
|
||||
|
||||
@ -110,7 +109,4 @@ int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
|
||||
(*message)->count = status->_ucount;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
#else
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
#endif
|
||||
}
|
||||
|
@ -111,12 +111,7 @@ static inline __opal_attribute_always_inline__ int
|
||||
mxm_recv_req->base.flags = 0;
|
||||
#endif
|
||||
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
mxm_recv_req->base.data.buffer.mkey = MXM_MKEY_NONE;
|
||||
#else
|
||||
mxm_recv_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
#endif
|
||||
|
||||
mxm_recv_req->base.context = mtl_mxm_request;
|
||||
mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
|
||||
|
||||
@ -160,7 +155,6 @@ int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_message_t **message,
|
||||
struct mca_mtl_request_t *mtl_request)
|
||||
{
|
||||
#if MXM_API >= MXM_VERSION(1,5)
|
||||
int ret;
|
||||
mxm_error_t err;
|
||||
mxm_recv_req_t *mxm_recv_req;
|
||||
@ -197,7 +191,4 @@ int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
|
||||
(*message) = MPI_MESSAGE_NULL;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
#else
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
#endif
|
||||
}
|
||||
|
@ -119,14 +119,11 @@ int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
mxm_send_req.base.data.buffer.mkey = MXM_MKEY_NONE;
|
||||
#else
|
||||
mxm_send_req.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
#endif
|
||||
|
||||
mxm_send_req.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
mxm_send_req.op.send.tag = tag;
|
||||
mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
mxm_send_req.base.flags = MXM_REQ_FLAG_BLOCKING;
|
||||
mxm_send_req.opcode = MXM_REQ_OP_SEND;
|
||||
@ -193,11 +190,7 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
mtl_mxm_request->buf = mxm_send_req->base.data.buffer.ptr;
|
||||
mtl_mxm_request->length = mxm_send_req->base.data.buffer.length;
|
||||
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
mxm_send_req->base.data.buffer.mkey = MXM_MKEY_NONE;
|
||||
#else
|
||||
mxm_send_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
#endif
|
||||
mxm_send_req->base.context = mtl_mxm_request;
|
||||
mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;
|
||||
|
||||
|
@ -30,11 +30,8 @@ typedef struct mca_mtl_mxm_module_t {
|
||||
int mxm_np;
|
||||
mxm_h mxm_context;
|
||||
mxm_ep_h ep;
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
mxm_context_opts_t mxm_opts;
|
||||
#else
|
||||
mxm_context_opts_t *mxm_opts;
|
||||
#endif
|
||||
mxm_context_opts_t *mxm_ctx_opts;
|
||||
mxm_ep_opts_t *mxm_ep_opts;
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
int using_mem_hooks;
|
||||
#endif
|
||||
@ -51,11 +48,7 @@ extern mca_mtl_mxm_module_t ompi_mtl_mxm;
|
||||
|
||||
typedef struct mca_mtl_mxm_component_t {
|
||||
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
|
||||
|
||||
#if MXM_API >= MXM_VERSION(1,5)
|
||||
ompi_free_list_t mxm_messages; /* will be used for MPI_Mprobe and MPI_Mrecv calls */
|
||||
#endif
|
||||
|
||||
} mca_mtl_mxm_component_t;
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user