1
1

mtl mxm: update configuration parsing api to mxm 2.1, drop

older version support (1.0 and 1.1), and cleanup the code.

reviewed by miked.

cmr=v1.7.4:reviewer=ompi-gk1.7

This commit was SVN r29797.
Этот коммит содержится в:
Yossi Etigin 2013-12-04 09:11:55 +00:00
родитель 4ee10e7589
Коммит a913b00f89
7 изменённых файлов: 42 добавлений и 119 удалений

Просмотреть файл

@ -89,7 +89,7 @@ static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
void *cbdata, bool from_alloc); void *cbdata, bool from_alloc);
#endif #endif
#if MXM_API < MXM_VERSION(2, 0) #if MXM_API < MXM_VERSION(2,0)
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid) static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
{ {
size_t addrlen; size_t addrlen;
@ -143,64 +143,13 @@ ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_bitmap, int lr,
{ {
mxm_error_t err; mxm_error_t err;
#if MXM_API < MXM_VERSION(1,5) #if MXM_API < MXM_VERSION(2,0)
mxm_ep_opts_t ep_opt; ompi_mtl_mxm.mxm_ep_opts->job_id = jobid;
struct sockaddr_mxm_local_proc sa_bind_self; ompi_mtl_mxm.mxm_ep_opts->local_rank = lr;
struct sockaddr_mxm_ib_local sa_bind_rdma; ompi_mtl_mxm.mxm_ep_opts->num_local_procs = nlps;
struct sockaddr_mxm_shm_proc sa_bind_shm; err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
mxm_fill_ep_opts(&ep_opt);
sa_bind_self.sa_family = AF_MXM_LOCAL_PROC;
sa_bind_self.context_id = lr;
sa_bind_rdma.sa_family = AF_MXM_IB_LOCAL;
sa_bind_rdma.lid = 0;
sa_bind_rdma.pkey = 0;
sa_bind_rdma.qp_num = 0;
sa_bind_rdma.sl = 0;
sa_bind_shm.sa_family = AF_MXM_SHM_PROC;
sa_bind_shm.jobid = jobid;
sa_bind_shm.process_id = lr;
sa_bind_shm.context_id = mxlr;
sa_bind_shm.num_procs = nlps;
ep_opt.ptl_bind_addr[MXM_PTL_SELF] =
(ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) ?
(struct sockaddr*) &sa_bind_self : NULL;
ep_opt.ptl_bind_addr[MXM_PTL_RDMA] =
(ptl_bitmap & MXM_BIT(MXM_PTL_RDMA)) ?
(struct sockaddr*) &sa_bind_rdma : NULL;
ep_opt.ptl_bind_addr[MXM_PTL_SHM] =
(ptl_bitmap & MXM_BIT(MXM_PTL_SHM)) ?
(struct sockaddr*) &sa_bind_shm : NULL;
MXM_VERBOSE(1, "MXM version is old, consider to upgrade");
err = mxm_ep_create(ctx, &ep_opt, ep);
#elif MXM_API < MXM_VERSION(2,0)
mxm_ep_opts_t *ep_opts;
err = mxm_config_read_ep_opts(&ep_opts);
if (err != MXM_OK) {
MXM_ERROR("Failed to parse MXM configuration");
return err;
}
ep_opts->job_id = jobid;
ep_opts->local_rank = lr;
ep_opts->num_local_procs = nlps;
err = mxm_ep_create(ctx, ep_opts, ep);
mxm_config_free(ep_opts);
#else #else
mxm_ep_opts_t *ep_opts; err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
err = mxm_config_read_ep_opts(&ep_opts);
if (err != MXM_OK) {
MXM_ERROR("Failed to parse MXM configuration");
return err;
}
err = mxm_ep_create(ctx, ep_opts, ep);
mxm_config_free_ep_opts(ep_opts);
#endif #endif
return err; return err;
} }
@ -367,10 +316,8 @@ int ompi_mtl_mxm_module_init(void)
} }
/* Setup the endpoint options and local addresses to bind to. */ /* Setup the endpoint options and local addresses to bind to. */
#if MXM_API < MXM_VERSION(1,5) #if MXM_API < MXM_VERSION(2,0)
ptl_bitmap = ompi_mtl_mxm.mxm_opts.ptl_bitmap; ptl_bitmap = ompi_mtl_mxm.mxm_ctx_opts->ptl_bitmap;
#elif MXM_API < MXM_VERSION(2,0)
ptl_bitmap = ompi_mtl_mxm.mxm_opts->ptl_bitmap;
#else #else
ptl_bitmap = 0; ptl_bitmap = 0;
#endif #endif
@ -488,6 +435,7 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]); conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]); conn_reqs[i].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]); conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
#else #else
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t); endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm; endpoint->mtl_mxm_module = &ompi_mtl_mxm;
@ -504,13 +452,12 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
#if MXM_API < MXM_VERSION(2,0) #if MXM_API < MXM_VERSION(2,0)
/* Connect to remote peers */ /* Connect to remote peers */
timeout = (mxm_get_version() < MXM_VERSION(1,5)) ? 1000 : -1; err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, -1);
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, timeout);
if (MXM_OK != err) { if (MXM_OK != err) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err)); MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
for (i = 0; i < nprocs; ++i) { for (i = 0; i < nprocs; ++i) {
if (MXM_OK != conn_reqs[i].error) { if (MXM_OK != conn_reqs[i].error) {
MXM_ERROR("MXM EP connect to %s error: %s\n", MXM_ERROR("MXM EP connect to %s error: %s\n",
(NULL == procs[i]->proc_hostname) ? (NULL == procs[i]->proc_hostname) ?
"unknown" : procs[i]->proc_hostname, "unknown" : procs[i]->proc_hostname,
mxm_error_string(conn_reqs[i].error)); mxm_error_string(conn_reqs[i].error));
@ -527,7 +474,9 @@ int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
endpoint->mxm_conn = conn_reqs[i].conn; endpoint->mxm_conn = conn_reqs[i].conn;
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint; procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
} }
#endif #endif
rc = OMPI_SUCCESS; rc = OMPI_SUCCESS;
bail: bail:
@ -601,10 +550,8 @@ static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
} }
#endif #endif
#if MXM_API >= MXM_VERSION(1,5)
OBJ_CLASS_INSTANCE( OBJ_CLASS_INSTANCE(
ompi_mtl_mxm_message_t, ompi_mtl_mxm_message_t,
ompi_free_list_item_t, ompi_free_list_item_t,
NULL, NULL,
NULL); NULL);
#endif

Просмотреть файл

@ -18,7 +18,12 @@
#ifndef MXM_VERSION #ifndef MXM_VERSION
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT)) #define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
#endif #endif
#if MXM_API < MXM_VERSION(2, 0)
#if MXM_API < MXM_VERSION(1,5)
#error "Unsupported MXM version, version 1.5 or above required"
#endif
#if MXM_API < MXM_VERSION(2,0)
#include <mxm/api/mxm_addr.h> #include <mxm/api/mxm_addr.h>
#endif #endif
@ -89,7 +94,6 @@ extern int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl);
int ompi_mtl_mxm_module_init(void); int ompi_mtl_mxm_module_init(void);
#if MXM_API >= MXM_VERSION(1,5)
struct ompi_mtl_mxm_message_t { struct ompi_mtl_mxm_message_t {
ompi_free_list_item_t super; ompi_free_list_item_t super;
@ -102,7 +106,6 @@ struct ompi_mtl_mxm_message_t {
}; };
typedef struct ompi_mtl_mxm_message_t ompi_mtl_mxm_message_t; typedef struct ompi_mtl_mxm_message_t ompi_mtl_mxm_message_t;
OBJ_CLASS_DECLARATION(ompi_mtl_mxm_message_t); OBJ_CLASS_DECLARATION(ompi_mtl_mxm_message_t);
#endif
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -72,7 +72,7 @@ static int ompi_mtl_mxm_component_register(void)
MCA_BASE_VAR_SCOPE_LOCAL, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_mtl_mxm.verbose); &ompi_mtl_mxm.verbose);
#if MXM_API >= MXM_VERSION(2,0) #if MXM_API > MXM_VERSION(2,0)
ompi_mtl_mxm.mxm_np = 0; ompi_mtl_mxm.mxm_np = 0;
#else #else
ompi_mtl_mxm.mxm_np = 128; ompi_mtl_mxm.mxm_np = 128;
@ -92,6 +92,7 @@ static int ompi_mtl_mxm_component_open(void)
{ {
mxm_error_t err; mxm_error_t err;
unsigned long cur_ver; unsigned long cur_ver;
int rc;
mca_mtl_mxm_output = opal_output_open(NULL); mca_mtl_mxm_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose); opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
@ -108,11 +109,6 @@ static int ompi_mtl_mxm_component_open(void)
} }
return OMPI_ERR_NOT_AVAILABLE; return OMPI_ERR_NOT_AVAILABLE;
} }
#if MXM_API < MXM_VERSION(1,5)
mxm_fill_context_opts(&ompi_mtl_mxm.mxm_opts);
err = mxm_init(&ompi_mtl_mxm.mxm_opts, &ompi_mtl_mxm.mxm_context);
MXM_VERBOSE(1, "mxm component open");
#else
#if MXM_API >= MXM_VERSION(2,0) #if MXM_API >= MXM_VERSION(2,0)
/* Register memory hooks */ /* Register memory hooks */
@ -130,15 +126,21 @@ static int ompi_mtl_mxm_component_open(void)
setenv("MXM_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y" , 0); setenv("MXM_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y" , 0);
#endif #endif
err = mxm_config_read_context_opts(&ompi_mtl_mxm.mxm_opts); #if MXM_API >= MXM_VERSION(2,1)
if (err != MXM_OK) { if (MXM_OK != mxm_config_read_opts(&ompi_mtl_mxm.mxm_ctx_opts,
&ompi_mtl_mxm.mxm_ep_opts,
"MPI", NULL, 0))
#else
if ((MXM_OK != mxm_config_read_context_opts(&ompi_mtl_mxm.mxm_ctx_opts)) ||
(MXM_OK != mxm_config_read_ep_opts(&ompi_mtl_mxm.mxm_ep_opts)))
#endif
{
MXM_ERROR("Failed to parse MXM configuration"); MXM_ERROR("Failed to parse MXM configuration");
return OPAL_ERR_BAD_PARAM; return OPAL_ERR_BAD_PARAM;
} }
err = mxm_init(ompi_mtl_mxm.mxm_opts, &ompi_mtl_mxm.mxm_context); err = mxm_init(ompi_mtl_mxm.mxm_ctx_opts, &ompi_mtl_mxm.mxm_context);
MXM_VERBOSE(1, "mxm component open"); MXM_VERBOSE(1, "mxm component open");
#endif
if (MXM_OK != err) { if (MXM_OK != err) {
if (MXM_ERR_NO_DEVICE == err) { if (MXM_ERR_NO_DEVICE == err) {
@ -150,10 +152,6 @@ static int ompi_mtl_mxm_component_open(void)
return OPAL_ERR_NOT_AVAILABLE; return OPAL_ERR_NOT_AVAILABLE;
} }
#if MXM_API >= MXM_VERSION(1,5)
{
int rc;
OBJ_CONSTRUCT(&mca_mtl_mxm_component.mxm_messages, ompi_free_list_t); OBJ_CONSTRUCT(&mca_mtl_mxm_component.mxm_messages, ompi_free_list_t);
rc = ompi_free_list_init_new(&mca_mtl_mxm_component.mxm_messages, rc = ompi_free_list_init_new(&mca_mtl_mxm_component.mxm_messages,
sizeof(ompi_mtl_mxm_message_t), sizeof(ompi_mtl_mxm_message_t),
@ -169,8 +167,6 @@ static int ompi_mtl_mxm_component_open(void)
mxm_error_string(err)); mxm_error_string(err));
return OPAL_ERR_NOT_AVAILABLE; return OPAL_ERR_NOT_AVAILABLE;
} }
}
#endif
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -183,9 +179,13 @@ static int ompi_mtl_mxm_component_close(void)
if ((cur_ver == MXM_API) && (ompi_mtl_mxm.mxm_context != NULL)) { if ((cur_ver == MXM_API) && (ompi_mtl_mxm.mxm_context != NULL)) {
mxm_cleanup(ompi_mtl_mxm.mxm_context); mxm_cleanup(ompi_mtl_mxm.mxm_context);
ompi_mtl_mxm.mxm_context = NULL; ompi_mtl_mxm.mxm_context = NULL;
#if MXM_API >= MXM_VERSION(1,5)
OBJ_DESTRUCT(&mca_mtl_mxm_component.mxm_messages); OBJ_DESTRUCT(&mca_mtl_mxm_component.mxm_messages);
#if MXM_API >= MXM_VERSION(2,0)
mxm_config_free_ep_opts(ompi_mtl_mxm.mxm_ep_opts);
mxm_config_free_context_opts(ompi_mtl_mxm.mxm_ctx_opts);
#else
mxm_config_free(ompi_mtl_mxm.mxm_ep_opts);
mxm_config_free(ompi_mtl_mxm.mxm_ctx_opts);
#endif #endif
} }

Просмотреть файл

@ -54,7 +54,6 @@ int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
struct ompi_message_t **message, struct ompi_message_t **message,
struct ompi_status_public_t *status) struct ompi_status_public_t *status)
{ {
#if MXM_API >= MXM_VERSION(1,5)
mxm_error_t err; mxm_error_t err;
mxm_recv_req_t req; mxm_recv_req_t req;
@ -110,7 +109,4 @@ int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
(*message)->count = status->_ucount; (*message)->count = status->_ucount;
return OMPI_SUCCESS; return OMPI_SUCCESS;
#else
return OMPI_ERR_NOT_IMPLEMENTED;
#endif
} }

Просмотреть файл

@ -111,12 +111,7 @@ static inline __opal_attribute_always_inline__ int
mxm_recv_req->base.flags = 0; mxm_recv_req->base.flags = 0;
#endif #endif
#if MXM_API < MXM_VERSION(1,5)
mxm_recv_req->base.data.buffer.mkey = MXM_MKEY_NONE;
#else
mxm_recv_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE; mxm_recv_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#endif
mxm_recv_req->base.context = mtl_mxm_request; mxm_recv_req->base.context = mtl_mxm_request;
mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb; mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
@ -160,7 +155,6 @@ int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
struct ompi_message_t **message, struct ompi_message_t **message,
struct mca_mtl_request_t *mtl_request) struct mca_mtl_request_t *mtl_request)
{ {
#if MXM_API >= MXM_VERSION(1,5)
int ret; int ret;
mxm_error_t err; mxm_error_t err;
mxm_recv_req_t *mxm_recv_req; mxm_recv_req_t *mxm_recv_req;
@ -197,7 +191,4 @@ int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
(*message) = MPI_MESSAGE_NULL; (*message) = MPI_MESSAGE_NULL;
return OMPI_SUCCESS; return OMPI_SUCCESS;
#else
return OMPI_ERR_NOT_IMPLEMENTED;
#endif
} }

Просмотреть файл

@ -119,14 +119,11 @@ int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret; return ret;
} }
#if MXM_API < MXM_VERSION(1,5)
mxm_send_req.base.data.buffer.mkey = MXM_MKEY_NONE;
#else
mxm_send_req.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#endif
mxm_send_req.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
mxm_send_req.op.send.tag = tag; mxm_send_req.op.send.tag = tag;
mxm_send_req.op.send.imm_data = ompi_comm_rank(comm); mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
#if MXM_API < MXM_VERSION(2,0) #if MXM_API < MXM_VERSION(2,0)
mxm_send_req.base.flags = MXM_REQ_FLAG_BLOCKING; mxm_send_req.base.flags = MXM_REQ_FLAG_BLOCKING;
mxm_send_req.opcode = MXM_REQ_OP_SEND; mxm_send_req.opcode = MXM_REQ_OP_SEND;
@ -193,11 +190,7 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
mtl_mxm_request->buf = mxm_send_req->base.data.buffer.ptr; mtl_mxm_request->buf = mxm_send_req->base.data.buffer.ptr;
mtl_mxm_request->length = mxm_send_req->base.data.buffer.length; mtl_mxm_request->length = mxm_send_req->base.data.buffer.length;
#if MXM_API < MXM_VERSION(1,5)
mxm_send_req->base.data.buffer.mkey = MXM_MKEY_NONE;
#else
mxm_send_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE; mxm_send_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#endif
mxm_send_req->base.context = mtl_mxm_request; mxm_send_req->base.context = mtl_mxm_request;
mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb; mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;

Просмотреть файл

@ -30,11 +30,8 @@ typedef struct mca_mtl_mxm_module_t {
int mxm_np; int mxm_np;
mxm_h mxm_context; mxm_h mxm_context;
mxm_ep_h ep; mxm_ep_h ep;
#if MXM_API < MXM_VERSION(1,5) mxm_context_opts_t *mxm_ctx_opts;
mxm_context_opts_t mxm_opts; mxm_ep_opts_t *mxm_ep_opts;
#else
mxm_context_opts_t *mxm_opts;
#endif
#if MXM_API >= MXM_VERSION(2,0) #if MXM_API >= MXM_VERSION(2,0)
int using_mem_hooks; int using_mem_hooks;
#endif #endif
@ -51,11 +48,7 @@ extern mca_mtl_mxm_module_t ompi_mtl_mxm;
typedef struct mca_mtl_mxm_component_t { typedef struct mca_mtl_mxm_component_t {
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */ mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
#if MXM_API >= MXM_VERSION(1,5)
ompi_free_list_t mxm_messages; /* will be used for MPI_Mprobe and MPI_Mrecv calls */ ompi_free_list_t mxm_messages; /* will be used for MPI_Mprobe and MPI_Mrecv calls */
#endif
} mca_mtl_mxm_component_t; } mca_mtl_mxm_component_t;