code and readme updates, some refactoring
This commit was SVN r24977.
Этот коммит содержится в:
родитель
8f4ac54336
Коммит
45ea375531
1
NEWS
1
NEWS
@ -62,6 +62,7 @@ Trunk (not on release branches yet)
|
||||
OPAL levels - intended for use when configuring without MPI support
|
||||
- Modified paffinity system to provide warning when bindings result in
|
||||
being "bound to all", which is equivalent to "not bound"
|
||||
- Added Mellanox MTL layer implementation (mxm)
|
||||
|
||||
|
||||
1.5.3
|
||||
|
5
README
5
README
@ -509,6 +509,9 @@ Network Support
|
||||
or
|
||||
shell$ mpirun --mca pml cm ...
|
||||
|
||||
- MXM MTL is an transport layer utilizing various Mellanox proprietary
|
||||
technologies and providing better scalability and performance for large scale jobs
|
||||
|
||||
- Myrinet MX (and Open-MX) support is shared between the 2 internal
|
||||
devices, the MTL and the BTL. The design of the BTL interface in
|
||||
Open MPI assumes that only naive one-sided communication
|
||||
@ -707,7 +710,7 @@ for a full list); a summary of the more commonly used ones follows:
|
||||
--with-mxm=<directory>
|
||||
Specify the directory where the Mellanox MXM library and
|
||||
header files are located. This option is generally only necessary
|
||||
if the InfiniPath headers and libraries are not in default
|
||||
if the MXM headers and libraries are not in default
|
||||
compiler/linker search paths.
|
||||
|
||||
MXM is the support library for Mellanox network adapters.
|
||||
|
@ -18,9 +18,9 @@ int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
|
||||
mxm_error_t err;
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
|
||||
|
||||
err = mxm_req_cancel(&mtl_mxm_request->mxm_request);
|
||||
err = mxm_req_cancel(mtl_mxm_request->mxm_base_request);
|
||||
if (MXM_OK == err) {
|
||||
err = mxm_req_test(&mtl_mxm_request->mxm_request);
|
||||
err = mxm_req_test(mtl_mxm_request->mxm_base_request);
|
||||
if (MXM_OK == err) {
|
||||
mtl_request->ompi_req->req_status._cancelled = true;
|
||||
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
|
||||
|
@ -18,21 +18,21 @@ int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
|
||||
int *flag, struct ompi_status_public_t *status)
|
||||
{
|
||||
mxm_error_t err;
|
||||
mxm_req_t req;
|
||||
mxm_recv_req_t req;
|
||||
|
||||
req.state = MXM_REQ_NEW;
|
||||
req.mq = (mxm_mq_h)comm->c_pml_comm;
|
||||
req.tag = tag;
|
||||
req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
|
||||
req.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
|
||||
req.base.state = MXM_REQ_NEW;
|
||||
req.base.mq = (mxm_mq_h)comm->c_pml_comm;
|
||||
req.tag = tag;
|
||||
req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
|
||||
req.base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
|
||||
|
||||
err = mxm_req_probe(&req);
|
||||
if (MXM_OK == err) {
|
||||
*flag = 1;
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.conn);
|
||||
status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.base.conn);
|
||||
status->MPI_TAG = req.completion.sender_tag;
|
||||
status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req.completion.status);
|
||||
status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(err);
|
||||
status->_ucount = req.completion.actual_len;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -18,26 +18,27 @@
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
|
||||
static void ompi_mtl_mxm_recv_completion_cb(mxm_req_t *req)
|
||||
static void ompi_mtl_mxm_recv_completion_cb(void *context)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
|
||||
struct ompi_request_t *ompi_req = mtl_mxm_request->super.ompi_req;
|
||||
mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
|
||||
struct ompi_request_t *ompi_req = req->super.ompi_req;
|
||||
mxm_recv_req_t *mxm_recv_req = (mxm_recv_req_t *)req->mxm_base_request;
|
||||
|
||||
/* Set completion status and envelope */
|
||||
ompi_req->req_status.MPI_TAG = req->completion.sender_tag;
|
||||
ompi_req->req_status.MPI_SOURCE = req->completion.sender_imm;
|
||||
ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->completion.status);
|
||||
ompi_req->req_status._ucount = req->completion.actual_len;
|
||||
ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag;
|
||||
ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm;
|
||||
ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->mxm_base_request->error);
|
||||
ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len;
|
||||
|
||||
/* Copy data */
|
||||
ompi_mtl_datatype_unpack(mtl_mxm_request->convertor, mtl_mxm_request->buf,
|
||||
req->completion.actual_len);
|
||||
ompi_mtl_datatype_unpack(req->convertor, req->buf,
|
||||
mxm_recv_req->completion.actual_len);
|
||||
|
||||
if (mtl_mxm_request->free_after) {
|
||||
free(mtl_mxm_request->buf);
|
||||
if (req->free_after) {
|
||||
free(req->buf);
|
||||
}
|
||||
|
||||
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
|
||||
req->super.completion_callback(&req->super);
|
||||
}
|
||||
|
||||
|
||||
@ -47,9 +48,8 @@ int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct mca_mtl_request_t *mtl_request)
|
||||
{
|
||||
mca_mtl_mxm_request_t * mtl_mxm_request;
|
||||
mca_mtl_mxm_endpoint_t* mxm_endpoint;
|
||||
ompi_proc_t* ompi_proc;
|
||||
mxm_error_t err;
|
||||
mxm_recv_req_t *mxm_recv_req;
|
||||
int ret;
|
||||
|
||||
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
|
||||
@ -63,22 +63,22 @@ int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* prepare a receive request embedded in the MTL request */
|
||||
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
|
||||
mtl_mxm_request->mxm_request.mq = (mxm_mq_h)comm->c_pml_comm;
|
||||
mtl_mxm_request->mxm_request.tag = tag;
|
||||
mtl_mxm_request->mxm_request.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
|
||||
mtl_mxm_request->mxm_request.conn = (src == MPI_ANY_SOURCE) ? NULL :
|
||||
ompi_mtl_mxm_conn_lookup(comm, src);
|
||||
mxm_recv_req = (mxm_recv_req_t *)mtl_mxm_request->mxm_base_request;
|
||||
|
||||
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
|
||||
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
|
||||
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_recv_completion_cb;
|
||||
mtl_mxm_request->mxm_request.context = mtl_mxm_request;
|
||||
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
|
||||
/* prepare a receive request embedded in the MTL request */
|
||||
mxm_recv_req->base.state = MXM_REQ_NEW;
|
||||
mxm_recv_req->base.mq = (mxm_mq_h)comm->c_pml_comm;
|
||||
mxm_recv_req->tag = tag;
|
||||
mxm_recv_req->tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
|
||||
mxm_recv_req->base.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
|
||||
|
||||
mxm_recv_req->base.data.buffer.ptr = mtl_mxm_request->buf;
|
||||
mxm_recv_req->base.data.buffer.length = mtl_mxm_request->length;
|
||||
mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
|
||||
mxm_recv_req->base.context = mtl_mxm_request;
|
||||
|
||||
/* post-recv */
|
||||
err = mxm_req_recv(&mtl_mxm_request->mxm_request);
|
||||
err = mxm_req_recv(mxm_recv_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting receive", true,
|
||||
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
struct mca_mtl_mxm_request_t {
|
||||
struct mca_mtl_request_t super;
|
||||
mxm_req_t mxm_request;
|
||||
mxm_req_base_t *mxm_base_request;
|
||||
/* mxm_segment_t mxm_segment[1]; */
|
||||
void *buf;
|
||||
size_t length;
|
||||
|
@ -17,30 +17,15 @@
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
|
||||
|
||||
static void ompi_mtl_mxm_send_completion_cb(mxm_req_t *req)
|
||||
static void ompi_mtl_mxm_send_completion_cb(void *context)
|
||||
{
|
||||
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request;
|
||||
mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = context;
|
||||
|
||||
if (mtl_mxm_request->free_after) {
|
||||
free(mtl_mxm_request->buf);
|
||||
}
|
||||
|
||||
switch (req->completion.status) {
|
||||
case MXM_OK:
|
||||
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
|
||||
= OMPI_SUCCESS;
|
||||
break;
|
||||
case MXM_ERR_MESSAGE_TRUNCATED:
|
||||
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
|
||||
= MPI_ERR_TRUNCATE;
|
||||
break;
|
||||
default:
|
||||
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
|
||||
= MPI_ERR_INTERN;
|
||||
break;
|
||||
}
|
||||
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm_base_request->error);
|
||||
|
||||
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
|
||||
}
|
||||
@ -50,41 +35,38 @@ int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
|
||||
struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode)
|
||||
{
|
||||
mxm_req_t mxm_req;
|
||||
mxm_send_req_t mxm_send_req;
|
||||
bool free_after;
|
||||
mxm_error_t err;
|
||||
int ret;
|
||||
|
||||
/* prepare local send request */
|
||||
mxm_req.state = MXM_REQ_NEW;
|
||||
mxm_req.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mxm_req.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
mxm_req.tag = tag;
|
||||
mxm_req.imm_data = ompi_comm_rank(comm);
|
||||
mxm_req.completed_cb = NULL;
|
||||
mxm_req.flags = 0;
|
||||
mxm_send_req.base.state = MXM_REQ_NEW;
|
||||
mxm_send_req.base.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mxm_send_req.base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
mxm_send_req.op.send.tag = tag;
|
||||
mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
|
||||
mxm_send_req.base.completed_cb = NULL;
|
||||
mxm_send_req.base.flags = MXM_REQ_FLAG_WAIT;
|
||||
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mxm_req.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
mxm_send_req.base.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
}
|
||||
ret = ompi_mtl_datatype_pack(convertor, &mxm_req.data.buf.ptr, &mxm_req.data.buf.len,
|
||||
ret = ompi_mtl_datatype_pack(convertor, &mxm_send_req.base.data.buffer.ptr, &mxm_send_req.base.data.buffer.length,
|
||||
&free_after);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* post-send */
|
||||
err = mxm_req_send(&mxm_req);
|
||||
err = mxm_req_send(&mxm_send_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* wait for request completion */
|
||||
err = mxm_req_wait(&mxm_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error while waiting in send", true, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
mxm_req_wait(&mxm_send_req.base);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -96,6 +78,7 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
mca_mtl_request_t * mtl_request)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request;
|
||||
mxm_send_req_t *mxm_send_req;
|
||||
mxm_error_t err;
|
||||
int ret;
|
||||
|
||||
@ -110,23 +93,25 @@ int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
return ret;
|
||||
}
|
||||
|
||||
mxm_send_req = (mxm_send_req_t *) mtl_mxm_request->mxm_base_request;
|
||||
|
||||
/* prepare a send request embedded in the MTL request */
|
||||
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
|
||||
mtl_mxm_request->mxm_request.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mtl_mxm_request->mxm_request.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
mtl_mxm_request->mxm_request.tag = tag;
|
||||
mtl_mxm_request->mxm_request.imm_data = ompi_comm_rank(comm);
|
||||
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
|
||||
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
|
||||
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_send_completion_cb;
|
||||
mtl_mxm_request->mxm_request.context = mtl_mxm_request;
|
||||
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
|
||||
mxm_send_req->base.state = MXM_REQ_NEW;
|
||||
mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
mxm_send_req->op.send.tag = tag;
|
||||
mxm_send_req->op.send.imm_data = ompi_comm_rank(comm);
|
||||
mxm_send_req->base.data.buffer.ptr = mtl_mxm_request->buf;
|
||||
mxm_send_req->base.data.buffer.length = mtl_mxm_request->length;
|
||||
mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;
|
||||
mxm_send_req->base.context = mtl_mxm_request;
|
||||
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mtl_mxm_request->mxm_request.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
}
|
||||
|
||||
/* post-send */
|
||||
err = mxm_req_send(&mtl_mxm_request->mxm_request);
|
||||
err = mxm_req_send(mxm_send_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user