From 45ea375531f17504b9ee696720ea2f410e639ea7 Mon Sep 17 00:00:00 2001 From: Mike Dubman Date: Tue, 2 Aug 2011 14:30:11 +0000 Subject: [PATCH] code and readme updates, some refactoring This commit was SVN r24977. --- NEWS | 1 + README | 5 +- ompi/mca/mtl/mxm/mtl_mxm_cancel.c | 4 +- ompi/mca/mtl/mxm/mtl_mxm_probe.c | 16 +++---- ompi/mca/mtl/mxm/mtl_mxm_recv.c | 54 ++++++++++----------- ompi/mca/mtl/mxm/mtl_mxm_request.h | 2 +- ompi/mca/mtl/mxm/mtl_mxm_send.c | 77 ++++++++++++------------------ 7 files changed, 74 insertions(+), 85 deletions(-) diff --git a/NEWS b/NEWS index cde599160f..9712682d74 100644 --- a/NEWS +++ b/NEWS @@ -62,6 +62,7 @@ Trunk (not on release branches yet) OPAL levels - intended for use when configuring without MPI support - Modified paffinity system to provide warning when bindings result in being "bound to all", which is equivalent to "not bound" +- Added Mellanox MTL layer implementation (mxm) 1.5.3 diff --git a/README b/README index 6dd237f1a8..42a106e184 100644 --- a/README +++ b/README @@ -509,6 +509,9 @@ Network Support or shell$ mpirun --mca pml cm ... +- MXM MTL is an transport layer utilizing various Mellanox proprietary + technologies and providing better scalability and performance for large scale jobs + - Myrinet MX (and Open-MX) support is shared between the 2 internal devices, the MTL and the BTL. The design of the BTL interface in Open MPI assumes that only naive one-sided communication @@ -707,7 +710,7 @@ for a full list); a summary of the more commonly used ones follows: --with-mxm= Specify the directory where the Mellanox MXM library and header files are located. This option is generally only necessary - if the InfiniPath headers and libraries are not in default + if the MXM headers and libraries are not in default compiler/linker search paths. MXM is the support library for Mellanox network adapters. diff --git a/ompi/mca/mtl/mxm/mtl_mxm_cancel.c b/ompi/mca/mtl/mxm/mtl_mxm_cancel.c index 5b14858b1c..3322e3a50e 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_cancel.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_cancel.c @@ -18,9 +18,9 @@ int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl, mxm_error_t err; mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request; - err = mxm_req_cancel(&mtl_mxm_request->mxm_request); + err = mxm_req_cancel(mtl_mxm_request->mxm_base_request); if (MXM_OK == err) { - err = mxm_req_test(&mtl_mxm_request->mxm_request); + err = mxm_req_test(mtl_mxm_request->mxm_base_request); if (MXM_OK == err) { mtl_request->ompi_req->req_status._cancelled = true; mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); diff --git a/ompi/mca/mtl/mxm/mtl_mxm_probe.c b/ompi/mca/mtl/mxm/mtl_mxm_probe.c index 990d7c7699..dfd57ae1b1 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_probe.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_probe.c @@ -18,21 +18,21 @@ int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl, int *flag, struct ompi_status_public_t *status) { mxm_error_t err; - mxm_req_t req; + mxm_recv_req_t req; - req.state = MXM_REQ_NEW; - req.mq = (mxm_mq_h)comm->c_pml_comm; - req.tag = tag; - req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU; - req.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src); + req.base.state = MXM_REQ_NEW; + req.base.mq = (mxm_mq_h)comm->c_pml_comm; + req.tag = tag; + req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU; + req.base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src); err = mxm_req_probe(&req); if (MXM_OK == err) { *flag = 1; if (MPI_STATUS_IGNORE != status) { - status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.conn); + status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.base.conn); status->MPI_TAG = req.completion.sender_tag; - status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req.completion.status); + status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(err); status->_ucount = req.completion.actual_len; } return OMPI_SUCCESS; diff --git a/ompi/mca/mtl/mxm/mtl_mxm_recv.c b/ompi/mca/mtl/mxm/mtl_mxm_recv.c index 4ab59edafa..cb3d688db5 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_recv.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_recv.c @@ -18,26 +18,27 @@ #include "mtl_mxm_request.h" -static void ompi_mtl_mxm_recv_completion_cb(mxm_req_t *req) +static void ompi_mtl_mxm_recv_completion_cb(void *context) { - mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context; - struct ompi_request_t *ompi_req = mtl_mxm_request->super.ompi_req; + mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context; + struct ompi_request_t *ompi_req = req->super.ompi_req; + mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t *)req->mxm_base_request; /* Set completion status and envelope */ - ompi_req->req_status.MPI_TAG = req->completion.sender_tag; - ompi_req->req_status.MPI_SOURCE = req->completion.sender_imm; - ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->completion.status); - ompi_req->req_status._ucount = req->completion.actual_len; + ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag; + ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm; + ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error); + ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len; /* Copy data */ - ompi_mtl_datatype_unpack(mtl_mxm_request->convertor, mtl_mxm_request->buf, - req->completion.actual_len); + ompi_mtl_datatype_unpack(req->convertor, req->buf, + mxm_recv_req->completion.actual_len); - if (mtl_mxm_request->free_after) { - free(mtl_mxm_request->buf); + if (req->free_after) { + free(req->buf); } - mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); + req->super.completion_callback(&req->super); } @@ -47,9 +48,8 @@ int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl, struct mca_mtl_request_t *mtl_request) { mca_mtl_mxm_request_t * mtl_mxm_request; - mca_mtl_mxm_endpoint_t* mxm_endpoint; - ompi_proc_t* ompi_proc; mxm_error_t err; + mxm_recv_req_t *mxm_recv_req; int ret; mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request; @@ -63,22 +63,22 @@ int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl, return ret; } - /* prepare a receive request embedded in the MTL request */ - mtl_mxm_request->mxm_request.state = MXM_REQ_NEW; - mtl_mxm_request->mxm_request.mq = (mxm_mq_h)comm->c_pml_comm; - mtl_mxm_request->mxm_request.tag = tag; - mtl_mxm_request->mxm_request.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU; - mtl_mxm_request->mxm_request.conn = (src == MPI_ANY_SOURCE) ? NULL : - ompi_mtl_mxm_conn_lookup(comm, src); + mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request; - mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf; - mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length; - mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_recv_completion_cb; - mtl_mxm_request->mxm_request.context = mtl_mxm_request; - mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK; + /* prepare a receive request embedded in the MTL request */ + mxm_recv_req->base.state = MXM_REQ_NEW; + mxm_recv_req->base.mq = (mxm_mq_h)comm->c_pml_comm; + mxm_recv_req->tag = tag; + mxm_recv_req->tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU; + mxm_recv_req->base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src); + + mxm_recv_req->base.data.buffer.ptr = mtl_mxm_request->buf; + mxm_recv_req->base.data.buffer.length = mtl_mxm_request->length; + mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb; + mxm_recv_req->base.context = mtl_mxm_request; /* post-recv */ - err = mxm_req_recv(&mtl_mxm_request->mxm_request); + err = mxm_req_recv(mxm_recv_req); if (MXM_OK != err) { orte_show_help("help-mtl-mxm.txt", "error posting receive", true, mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length); diff --git a/ompi/mca/mtl/mxm/mtl_mxm_request.h b/ompi/mca/mtl/mxm/mtl_mxm_request.h index fcd70db680..4b6e4f9ced 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_request.h +++ b/ompi/mca/mtl/mxm/mtl_mxm_request.h @@ -16,7 +16,7 @@ struct mca_mtl_mxm_request_t { struct mca_mtl_request_t super; - mxm_req_t mxm_request; + mxm_req_base_t *mxm_base_request; /* mxm_segment_t mxm_segment[1]; */ void *buf; size_t length; diff --git a/ompi/mca/mtl/mxm/mtl_mxm_send.c b/ompi/mca/mtl/mxm/mtl_mxm_send.c index ea3c68a89b..18f45a30f3 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_send.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_send.c @@ -17,30 +17,15 @@ #include "ompi/mca/mtl/base/mtl_base_datatype.h" -static void ompi_mtl_mxm_send_completion_cb(mxm_req_t *req) +static void ompi_mtl_mxm_send_completion_cb(void *context) { - - mca_mtl_mxm_request_t *mtl_mxm_request; - mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context; + mca_mtl_mxm_request_t *mtl_mxm_request = context; if (mtl_mxm_request->free_after) { free(mtl_mxm_request->buf); } - switch (req->completion.status) { - case MXM_OK: - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR - = OMPI_SUCCESS; - break; - case MXM_ERR_MESSAGE_TRUNCATED: - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR - = MPI_ERR_TRUNCATE; - break; - default: - mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR - = MPI_ERR_INTERN; - break; - } + mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error); mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); } @@ -50,41 +35,38 @@ int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl, struct opal_convertor_t *convertor, mca_pml_base_send_mode_t mode) { - mxm_req_t mxm_req; + mxm_send_req_t mxm_send_req; bool free_after; mxm_error_t err; int ret; /* prepare local send request */ - mxm_req.state = MXM_REQ_NEW; - mxm_req.mq = ompi_mtl_mxm_mq_lookup(comm); - mxm_req.conn = ompi_mtl_mxm_conn_lookup(comm, dest); - mxm_req.tag = tag; - mxm_req.imm_data = ompi_comm_rank(comm); - mxm_req.completed_cb = NULL; - mxm_req.flags = 0; + mxm_send_req.base.state = MXM_REQ_NEW; + mxm_send_req.base.mq = ompi_mtl_mxm_mq_lookup(comm); + mxm_send_req.base.conn = ompi_mtl_mxm_conn_lookup(comm, dest); + mxm_send_req.op.send.tag = tag; + mxm_send_req.op.send.imm_data = ompi_comm_rank(comm); + mxm_send_req.base.completed_cb = NULL; + mxm_send_req.base.flags = MXM_REQ_FLAG_WAIT; + if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { - mxm_req.flags |= MXM_REQ_FLAG_SEND_SYNC; + mxm_send_req.base.flags |= MXM_REQ_FLAG_SEND_SYNC; } - ret = ompi_mtl_datatype_pack(convertor, &mxm_req.data.buf.ptr, &mxm_req.data.buf.len, + ret = ompi_mtl_datatype_pack(convertor, &mxm_send_req.base.data.buffer.ptr, &mxm_send_req.base.data.buffer.length, &free_after); if (OMPI_SUCCESS != ret) { return ret; } /* post-send */ - err = mxm_req_send(&mxm_req); + err = mxm_req_send(&mxm_send_req); if (MXM_OK != err) { orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err)); return OMPI_ERROR; } /* wait for request completion */ - err = mxm_req_wait(&mxm_req); - if (MXM_OK != err) { - orte_show_help("help-mtl-mxm.txt", "error while waiting in send", true, mxm_error_string(err)); - return OMPI_ERROR; - } + mxm_req_wait(&mxm_send_req.base); return OMPI_SUCCESS; } @@ -96,6 +78,7 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl, mca_mtl_request_t * mtl_request) { mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request; + mxm_send_req_t *mxm_send_req; mxm_error_t err; int ret; @@ -110,23 +93,25 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl, return ret; } + mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request; + /* prepare a send request embedded in the MTL request */ - mtl_mxm_request->mxm_request.state = MXM_REQ_NEW; - mtl_mxm_request->mxm_request.mq = ompi_mtl_mxm_mq_lookup(comm); - mtl_mxm_request->mxm_request.conn = ompi_mtl_mxm_conn_lookup(comm, dest); - mtl_mxm_request->mxm_request.tag = tag; - mtl_mxm_request->mxm_request.imm_data = ompi_comm_rank(comm); - mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf; - mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length; - mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_send_completion_cb; - mtl_mxm_request->mxm_request.context = mtl_mxm_request; - mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK; + mxm_send_req->base.state = MXM_REQ_NEW; + mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm); + mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest); + mxm_send_req->op.send.tag = tag; + mxm_send_req->op.send.imm_data = ompi_comm_rank(comm); + mxm_send_req->base.data.buffer.ptr = mtl_mxm_request->buf; + mxm_send_req->base.data.buffer.length = mtl_mxm_request->length; + mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb; + mxm_send_req->base.context = mtl_mxm_request; + if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { - mtl_mxm_request->mxm_request.flags |= MXM_REQ_FLAG_SEND_SYNC; + mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC; } /* post-send */ - err = mxm_req_send(&mtl_mxm_request->mxm_request); + err = mxm_req_send(mxm_send_req); if (MXM_OK != err) { orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err)); return OMPI_ERROR;