1
1

code and readme updates, some refactoring

This commit was SVN r24977.
Этот коммит содержится в:
Mike Dubman 2011-08-02 14:30:11 +00:00
родитель 8f4ac54336
Коммит 45ea375531
7 изменённых файлов: 74 добавлений и 85 удалений

1
NEWS
Просмотреть файл

@ -62,6 +62,7 @@ Trunk (not on release branches yet)
OPAL levels - intended for use when configuring without MPI support OPAL levels - intended for use when configuring without MPI support
- Modified paffinity system to provide warning when bindings result in - Modified paffinity system to provide warning when bindings result in
being "bound to all", which is equivalent to "not bound" being "bound to all", which is equivalent to "not bound"
- Added Mellanox MTL layer implementation (mxm)
1.5.3 1.5.3

5
README
Просмотреть файл

@ -509,6 +509,9 @@ Network Support
or or
shell$ mpirun --mca pml cm ... shell$ mpirun --mca pml cm ...
- MXM MTL is an transport layer utilizing various Mellanox proprietary
technologies and providing better scalability and performance for large scale jobs
- Myrinet MX (and Open-MX) support is shared between the 2 internal - Myrinet MX (and Open-MX) support is shared between the 2 internal
devices, the MTL and the BTL. The design of the BTL interface in devices, the MTL and the BTL. The design of the BTL interface in
Open MPI assumes that only naive one-sided communication Open MPI assumes that only naive one-sided communication
@ -707,7 +710,7 @@ for a full list); a summary of the more commonly used ones follows:
--with-mxm=<directory> --with-mxm=<directory>
Specify the directory where the Mellanox MXM library and Specify the directory where the Mellanox MXM library and
header files are located. This option is generally only necessary header files are located. This option is generally only necessary
if the InfiniPath headers and libraries are not in default if the MXM headers and libraries are not in default
compiler/linker search paths. compiler/linker search paths.
MXM is the support library for Mellanox network adapters. MXM is the support library for Mellanox network adapters.

Просмотреть файл

@ -18,9 +18,9 @@ int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
mxm_error_t err; mxm_error_t err;
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request; mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
err = mxm_req_cancel(&mtl_mxm_request->mxm_request); err = mxm_req_cancel(mtl_mxm_request->mxm_base_request);
if (MXM_OK == err) { if (MXM_OK == err) {
err = mxm_req_test(&mtl_mxm_request->mxm_request); err = mxm_req_test(mtl_mxm_request->mxm_base_request);
if (MXM_OK == err) { if (MXM_OK == err) {
mtl_request->ompi_req->req_status._cancelled = true; mtl_request->ompi_req->req_status._cancelled = true;
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);

Просмотреть файл

@ -18,21 +18,21 @@ int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
int *flag, struct ompi_status_public_t *status) int *flag, struct ompi_status_public_t *status)
{ {
mxm_error_t err; mxm_error_t err;
mxm_req_t req; mxm_recv_req_t req;
req.state = MXM_REQ_NEW; req.base.state = MXM_REQ_NEW;
req.mq = (mxm_mq_h)comm->c_pml_comm; req.base.mq = (mxm_mq_h)comm->c_pml_comm;
req.tag = tag; req.tag = tag;
req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU; req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
req.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src); req.base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
err = mxm_req_probe(&req); err = mxm_req_probe(&req);
if (MXM_OK == err) { if (MXM_OK == err) {
*flag = 1; *flag = 1;
if (MPI_STATUS_IGNORE != status) { if (MPI_STATUS_IGNORE != status) {
status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.conn); status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.base.conn);
status->MPI_TAG = req.completion.sender_tag; status->MPI_TAG = req.completion.sender_tag;
status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req.completion.status); status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(err);
status->_ucount = req.completion.actual_len; status->_ucount = req.completion.actual_len;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -18,26 +18,27 @@
#include "mtl_mxm_request.h" #include "mtl_mxm_request.h"
static void ompi_mtl_mxm_recv_completion_cb(mxm_req_t *req) static void ompi_mtl_mxm_recv_completion_cb(void *context)
{ {
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context; mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
struct ompi_request_t *ompi_req = mtl_mxm_request->super.ompi_req; struct ompi_request_t *ompi_req = req->super.ompi_req;
mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t *)req->mxm_base_request;
/* Set completion status and envelope */ /* Set completion status and envelope */
ompi_req->req_status.MPI_TAG = req->completion.sender_tag; ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag;
ompi_req->req_status.MPI_SOURCE = req->completion.sender_imm; ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm;
ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->completion.status); ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error);
ompi_req->req_status._ucount = req->completion.actual_len; ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len;
/* Copy data */ /* Copy data */
ompi_mtl_datatype_unpack(mtl_mxm_request->convertor, mtl_mxm_request->buf, ompi_mtl_datatype_unpack(req->convertor, req->buf,
req->completion.actual_len); mxm_recv_req->completion.actual_len);
if (mtl_mxm_request->free_after) { if (req->free_after) {
free(mtl_mxm_request->buf); free(req->buf);
} }
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); req->super.completion_callback(&req->super);
} }
@ -47,9 +48,8 @@ int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
struct mca_mtl_request_t *mtl_request) struct mca_mtl_request_t *mtl_request)
{ {
mca_mtl_mxm_request_t * mtl_mxm_request; mca_mtl_mxm_request_t * mtl_mxm_request;
mca_mtl_mxm_endpoint_t* mxm_endpoint;
ompi_proc_t* ompi_proc;
mxm_error_t err; mxm_error_t err;
mxm_recv_req_t *mxm_recv_req;
int ret; int ret;
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request; mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
@ -63,22 +63,22 @@ int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
return ret; return ret;
} }
/* prepare a receive request embedded in the MTL request */ mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request;
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
mtl_mxm_request->mxm_request.mq = (mxm_mq_h)comm->c_pml_comm;
mtl_mxm_request->mxm_request.tag = tag;
mtl_mxm_request->mxm_request.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
mtl_mxm_request->mxm_request.conn = (src == MPI_ANY_SOURCE) ? NULL :
ompi_mtl_mxm_conn_lookup(comm, src);
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf; /* prepare a receive request embedded in the MTL request */
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length; mxm_recv_req->base.state = MXM_REQ_NEW;
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_recv_completion_cb; mxm_recv_req->base.mq = (mxm_mq_h)comm->c_pml_comm;
mtl_mxm_request->mxm_request.context = mtl_mxm_request; mxm_recv_req->tag = tag;
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK; mxm_recv_req->tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
mxm_recv_req->base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
mxm_recv_req->base.data.buffer.ptr = mtl_mxm_request->buf;
mxm_recv_req->base.data.buffer.length = mtl_mxm_request->length;
mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
mxm_recv_req->base.context = mtl_mxm_request;
/* post-recv */ /* post-recv */
err = mxm_req_recv(&mtl_mxm_request->mxm_request); err = mxm_req_recv(mxm_recv_req);
if (MXM_OK != err) { if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error posting receive", true, orte_show_help("help-mtl-mxm.txt", "error posting receive", true,
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length); mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);

Просмотреть файл

@ -16,7 +16,7 @@
struct mca_mtl_mxm_request_t { struct mca_mtl_mxm_request_t {
struct mca_mtl_request_t super; struct mca_mtl_request_t super;
mxm_req_t mxm_request; mxm_req_base_t *mxm_base_request;
/* mxm_segment_t mxm_segment[1]; */ /* mxm_segment_t mxm_segment[1]; */
void *buf; void *buf;
size_t length; size_t length;

Просмотреть файл

@ -17,30 +17,15 @@
#include "ompi/mca/mtl/base/mtl_base_datatype.h" #include "ompi/mca/mtl/base/mtl_base_datatype.h"
static void ompi_mtl_mxm_send_completion_cb(mxm_req_t *req) static void ompi_mtl_mxm_send_completion_cb(void *context)
{ {
mca_mtl_mxm_request_t *mtl_mxm_request = context;
mca_mtl_mxm_request_t *mtl_mxm_request;
mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
if (mtl_mxm_request->free_after) { if (mtl_mxm_request->free_after) {
free(mtl_mxm_request->buf); free(mtl_mxm_request->buf);
} }
switch (req->completion.status) { mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error);
case MXM_OK:
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
= OMPI_SUCCESS;
break;
case MXM_ERR_MESSAGE_TRUNCATED:
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
= MPI_ERR_TRUNCATE;
break;
default:
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
= MPI_ERR_INTERN;
break;
}
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super); mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
} }
@ -50,41 +35,38 @@ int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode) mca_pml_base_send_mode_t mode)
{ {
mxm_req_t mxm_req; mxm_send_req_t mxm_send_req;
bool free_after; bool free_after;
mxm_error_t err; mxm_error_t err;
int ret; int ret;
/* prepare local send request */ /* prepare local send request */
mxm_req.state = MXM_REQ_NEW; mxm_send_req.base.state = MXM_REQ_NEW;
mxm_req.mq = ompi_mtl_mxm_mq_lookup(comm); mxm_send_req.base.mq = ompi_mtl_mxm_mq_lookup(comm);
mxm_req.conn = ompi_mtl_mxm_conn_lookup(comm, dest); mxm_send_req.base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
mxm_req.tag = tag; mxm_send_req.op.send.tag = tag;
mxm_req.imm_data = ompi_comm_rank(comm); mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
mxm_req.completed_cb = NULL; mxm_send_req.base.completed_cb = NULL;
mxm_req.flags = 0; mxm_send_req.base.flags = MXM_REQ_FLAG_WAIT;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_req.flags |= MXM_REQ_FLAG_SEND_SYNC; mxm_send_req.base.flags |= MXM_REQ_FLAG_SEND_SYNC;
} }
ret = ompi_mtl_datatype_pack(convertor, &mxm_req.data.buf.ptr, &mxm_req.data.buf.len, ret = ompi_mtl_datatype_pack(convertor, &mxm_send_req.base.data.buffer.ptr, &mxm_send_req.base.data.buffer.length,
&free_after); &free_after);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
return ret; return ret;
} }
/* post-send */ /* post-send */
err = mxm_req_send(&mxm_req); err = mxm_req_send(&mxm_send_req);
if (MXM_OK != err) { if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err)); orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
return OMPI_ERROR; return OMPI_ERROR;
} }
/* wait for request completion */ /* wait for request completion */
err = mxm_req_wait(&mxm_req); mxm_req_wait(&mxm_send_req.base);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error while waiting in send", true, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -96,6 +78,7 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
mca_mtl_request_t * mtl_request) mca_mtl_request_t * mtl_request)
{ {
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request; mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request;
mxm_send_req_t *mxm_send_req;
mxm_error_t err; mxm_error_t err;
int ret; int ret;
@ -110,23 +93,25 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
return ret; return ret;
} }
mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request;
/* prepare a send request embedded in the MTL request */ /* prepare a send request embedded in the MTL request */
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW; mxm_send_req->base.state = MXM_REQ_NEW;
mtl_mxm_request->mxm_request.mq = ompi_mtl_mxm_mq_lookup(comm); mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm);
mtl_mxm_request->mxm_request.conn = ompi_mtl_mxm_conn_lookup(comm, dest); mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
mtl_mxm_request->mxm_request.tag = tag; mxm_send_req->op.send.tag = tag;
mtl_mxm_request->mxm_request.imm_data = ompi_comm_rank(comm); mxm_send_req->op.send.imm_data = ompi_comm_rank(comm);
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf; mxm_send_req->base.data.buffer.ptr = mtl_mxm_request->buf;
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length; mxm_send_req->base.data.buffer.length = mtl_mxm_request->length;
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_send_completion_cb; mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;
mtl_mxm_request->mxm_request.context = mtl_mxm_request; mxm_send_req->base.context = mtl_mxm_request;
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mtl_mxm_request->mxm_request.flags |= MXM_REQ_FLAG_SEND_SYNC; mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC;
} }
/* post-send */ /* post-send */
err = mxm_req_send(&mtl_mxm_request->mxm_request); err = mxm_req_send(mxm_send_req);
if (MXM_OK != err) { if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err)); orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
return OMPI_ERROR; return OMPI_ERROR;