initial implementation of MXM MTL layer
This commit was SVN r24946.
Этот коммит содержится в:
родитель
715f871605
Коммит
aefffa073d
17
README
17
README
@ -140,6 +140,7 @@ Detailed Open MPI v1.5 Feature List:
|
||||
process)
|
||||
- Many other small improvements and bug fixes, too numerous to
|
||||
list here
|
||||
- Mellanox MXM MTL layer implementation
|
||||
|
||||
Known issues
|
||||
------------
|
||||
@ -493,10 +494,11 @@ Network Support
|
||||
performance:
|
||||
- Myrinet MX (including Open-MX, but not GM)
|
||||
- InfiniPath PSM
|
||||
- Mellanox MXM
|
||||
- Portals
|
||||
|
||||
Open MPI will, by default, choose to use "cm" when the InfiniPath
|
||||
PSM MTL can be used. Otherwise, "ob1" will be used and the
|
||||
PSM or Mellanox MXM MTL can be used. Otherwise, "ob1" will be used and the
|
||||
corresponding BTLs will be selected. "csum" will never be selected
|
||||
by default. Users can force the use of ob1 or cm if desired by
|
||||
setting the "pml" MCA parameter at run-time:
|
||||
@ -702,6 +704,19 @@ for a full list); a summary of the more commonly used ones follows:
|
||||
look in <psm directory>/lib and <psm directory>/lib64, which covers
|
||||
most cases. This option is only needed for special configurations.
|
||||
|
||||
--with-mxm=<directory>
|
||||
Specify the directory where the Mellanox MXM library and
|
||||
header files are located. This option is generally only necessary
|
||||
if the InfiniPath headers and libraries are not in default
|
||||
compiler/linker search paths.
|
||||
|
||||
MXM is the support library for Mellanox network adapters.
|
||||
|
||||
--with-mxm-libdir=<directory>
|
||||
Look in directory for the MXM libraries. By default, Open MPI will
|
||||
look in <mxm directory>/lib and <mxm directory>/lib64, which covers
|
||||
most cases. This option is only needed for special configurations.
|
||||
|
||||
--with-sctp=<directory>
|
||||
Specify the directory where the SCTP libraries and header files are
|
||||
located. This option is generally only necessary if the SCTP headers
|
||||
|
57
ompi/config/ompi_check_mxm.m4
Обычный файл
57
ompi/config/ompi_check_mxm.m4
Обычный файл
@ -0,0 +1,57 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# OMPI_CHECK_MXM(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if MXM support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
AC_DEFUN([OMPI_CHECK_MXM],[
|
||||
AC_ARG_WITH([mxm],
|
||||
[AC_HELP_STRING([--with-mxm(=DIR)],
|
||||
[Build Mellanox Messaging support])])
|
||||
OMPI_CHECK_WITHDIR([mxm], [$with_mxm], [include/mxm/api/mxm_api.h])
|
||||
AC_ARG_WITH([mxm-libdir],
|
||||
[AC_HELP_STRING([--with-mxm-libdir=DIR],
|
||||
[Search for Mellanox Messaging libraries in DIR])])
|
||||
OMPI_CHECK_WITHDIR([mxm-libdir], [$with_mxm_libdir], [libmxm.*])
|
||||
|
||||
ompi_check_mxm_$1_save_CPPFLAGS="$CPPFLAGS"
|
||||
ompi_check_mxm_$1_save_LDFLAGS="$LDFLAGS"
|
||||
ompi_check_mxm_$1_save_LIBS="$LIBS"
|
||||
|
||||
AS_IF([test "$with_mxm" != "no"],
|
||||
[AS_IF([test ! -z "$with_mxm" -a "$with_mxm" != "yes"],
|
||||
[ompi_check_mxm_dir="$with_mxm"])
|
||||
AS_IF([test ! -z "$with_mxm_libdir" -a "$with_mxm_libdir" != "yes"],
|
||||
[ompi_check_mxm_libdir="$with_mxm_libdir"])
|
||||
|
||||
OMPI_CHECK_PACKAGE([$1],
|
||||
[mxm/api/mxm_api.h],
|
||||
[mxm],
|
||||
[mxm_cleanup],
|
||||
[],
|
||||
[$ompi_check_mxm_dir],
|
||||
[$ompi_check_mxm_libdir],
|
||||
[ompi_check_mxm_happy="yes"],
|
||||
[ompi_check_mxm_happy="no"])],
|
||||
[ompi_check_mxm_happy="no"])
|
||||
|
||||
CPPFLAGS="$ompi_check_mxm_$1_save_CPPFLAGS"
|
||||
LDFLAGS="$ompi_check_mxm_$1_save_LDFLAGS"
|
||||
LIBS="$ompi_check_mxm_$1_save_LIBS"
|
||||
|
||||
AS_IF([test "$ompi_check_mxm_happy" = "yes"],
|
||||
[$2],
|
||||
[AS_IF([test ! -z "$with_mxm" -a "$with_mxm" != "no"],
|
||||
[AC_MSG_ERROR([MXM support requested but not found. Aborting])])
|
||||
$3])
|
||||
])
|
||||
|
49
ompi/mca/mtl/mxm/Makefile.am
Обычный файл
49
ompi/mca/mtl/mxm/Makefile.am
Обычный файл
@ -0,0 +1,49 @@
|
||||
#
|
||||
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(mtl_mxm_CPPFLAGS)
|
||||
|
||||
dist_pkgdata_DATA = help-mtl-mxm.txt
|
||||
|
||||
mtl_mxm_sources = \
|
||||
mtl_mxm.c \
|
||||
mtl_mxm.h \
|
||||
mtl_mxm_cancel.c \
|
||||
mtl_mxm_component.c \
|
||||
mtl_mxm_endpoint.c \
|
||||
mtl_mxm_endpoint.h \
|
||||
mtl_mxm_probe.c \
|
||||
mtl_mxm_recv.c \
|
||||
mtl_mxm_request.h \
|
||||
mtl_mxm_send.c \
|
||||
mtl_mxm_debug.h \
|
||||
mtl_mxm_types.h
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_ompi_mtl_mxm_DSO
|
||||
component_noinst =
|
||||
component_install = mca_mtl_mxm.la
|
||||
else
|
||||
component_noinst = libmca_mtl_mxm.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
|
||||
mca_mtl_mxm_la_LIBADD = $(mtl_mxm_LIBS)
|
||||
mca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
|
||||
libmca_mtl_mxm_la_LIBADD = $(mtl_mxm_LIBS)
|
||||
libmca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)
|
32
ompi/mca/mtl/mxm/configure.m4
Обычный файл
32
ompi/mca/mtl/mxm/configure.m4
Обычный файл
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
# MCA_mtl_mxm_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_ompi_mtl_mxm_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/mtl/mxm/Makefile])
|
||||
|
||||
OMPI_CHECK_MXM([mtl_mxm],
|
||||
[mtl_mxm_happy="yes"],
|
||||
[mtl_mxm_happy="no"])
|
||||
|
||||
AS_IF([test "$mtl_mxm_happy" = "yes"],
|
||||
[mtl_mxm_WRAPPER_EXTRA_LDFLAGS="$mtl_mxm_LDFLAGS"
|
||||
mtl_mxm_WRAPPER_EXTRA_LIBS="$mtl_mxm_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build mxm
|
||||
AC_SUBST([mtl_mxm_CFLAGS])
|
||||
AC_SUBST([mtl_mxm_CPPFLAGS])
|
||||
AC_SUBST([mtl_mxm_LDFLAGS])
|
||||
AC_SUBST([mtl_mxm_LIBS])
|
||||
])dnl
|
||||
|
67
ompi/mca/mtl/mxm/help-mtl-mxm.txt
Обычный файл
67
ompi/mca/mtl/mxm/help-mtl-mxm.txt
Обычный файл
@ -0,0 +1,67 @@
|
||||
#
|
||||
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
[no uuid present]
|
||||
Error obtaining unique transport key from ORTE (orte_precondition_transports %s
|
||||
the environment).
|
||||
|
||||
Local host: %s
|
||||
|
||||
[unable to create endpoint]
|
||||
MXM was unable to create an endpoint. Please make sure that the network link is
|
||||
active on the node and the hardware is functioning.
|
||||
|
||||
Error: %s
|
||||
|
||||
[unable to extract endpoint ib address]
|
||||
MXM was unable to read IB settings for endpoint
|
||||
|
||||
Error: %s
|
||||
|
||||
[unable to extract endpoint local address]
|
||||
MXM was unable to read shmem settings for endpoint
|
||||
|
||||
Error: %s
|
||||
|
||||
[mxm mq create]
|
||||
Failed to create MQ for endpoint
|
||||
|
||||
Error: %s
|
||||
|
||||
[errors during mxm_progress]
|
||||
|
||||
Error %s occurred in attempting to make network progress (mxm_progress).
|
||||
|
||||
|
||||
[mxm init]
|
||||
Initialization of MXM library failed.
|
||||
|
||||
Error: %s
|
||||
|
||||
[error polling network]
|
||||
Error %s occurred in attempting to make network progress (mxm_mq_ipeek).
|
||||
|
||||
[error posting receive]
|
||||
Unable to post application receive buffer
|
||||
|
||||
Error: %s
|
||||
Buffer: %p
|
||||
Length: %d
|
||||
|
||||
[error posting send]
|
||||
Unable to post application send buffer
|
||||
|
||||
Error: %s
|
||||
|
||||
[error while waiting in send]
|
||||
Unable while waiting in send
|
||||
|
||||
Error: %s
|
||||
|
274
ompi/mca/mtl/mxm/mtl_mxm.c
Обычный файл
274
ompi/mca/mtl/mxm/mtl_mxm.c
Обычный файл
@ -0,0 +1,274 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_endpoint.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
#include "mtl_mxm_debug.h"
|
||||
|
||||
mca_mtl_mxm_module_t ompi_mtl_mxm = {
|
||||
{
|
||||
0, /* max context id */
|
||||
0, /* max tag value */
|
||||
0, /* request reserve space */
|
||||
0, /* flags */
|
||||
ompi_mtl_mxm_add_procs,
|
||||
ompi_mtl_mxm_del_procs,
|
||||
ompi_mtl_mxm_finalize,
|
||||
ompi_mtl_mxm_send,
|
||||
ompi_mtl_mxm_isend,
|
||||
ompi_mtl_mxm_irecv,
|
||||
ompi_mtl_mxm_iprobe,
|
||||
ompi_mtl_mxm_cancel,
|
||||
ompi_mtl_mxm_add_comm,
|
||||
ompi_mtl_mxm_del_comm
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static uint32_t ompi_mtl_mxm_get_job_id(void)
|
||||
{
|
||||
uint8_t unique_job_key[16];
|
||||
uint32_t job_key;
|
||||
unsigned long long *uu;
|
||||
char *generated_key;
|
||||
uint16_t *jkp;
|
||||
|
||||
jkp = (uint16_t *) unique_job_key;
|
||||
uu = (unsigned long long *) unique_job_key;
|
||||
|
||||
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
|
||||
memset(uu, 0, sizeof(unique_job_key));
|
||||
|
||||
if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) {
|
||||
orte_show_help("help-mtl-mxm.txt", "no uuid present", true,
|
||||
generated_key ? "could not be parsed from" :
|
||||
"not present in", orte_process_info.nodename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
job_key = ((jkp[2] ^ jkp[3]) >> 8) | ((jkp[0] ^ jkp[1]) << 8);
|
||||
job_key ^= ((jkp[6] ^ jkp[7]) >> 8) | ((jkp[4] ^ jkp[5]) << 8);
|
||||
job_key &= ~0xff;
|
||||
return job_key;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_progress(void);
|
||||
|
||||
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
|
||||
{
|
||||
size_t addrlen;
|
||||
mxm_error_t err;
|
||||
|
||||
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
|
||||
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
|
||||
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
|
||||
true, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int ompi_mtl_mxm_module_init(void)
|
||||
{
|
||||
struct sockaddr_mxm_local_proc sa_bind_self;
|
||||
struct sockaddr_mxm_ib_local sa_bind_rdma;
|
||||
mxm_ep_opts_t ep_opt;
|
||||
ompi_mtl_mxm_ep_conn_info_t ep_info;
|
||||
mxm_error_t err;
|
||||
uint32_t jobid;
|
||||
|
||||
jobid = ompi_mtl_mxm_get_job_id();
|
||||
if (0 == jobid) {
|
||||
MXM_ERROR("Failed to generate jobid");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Setup the endpoint options and local addresses to bind to. */
|
||||
mxm_fill_ep_opts(&ep_opt);
|
||||
|
||||
sa_bind_self.sa_family = AF_MXM_LOCAL_PROC;
|
||||
sa_bind_self.context_id = jobid;
|
||||
sa_bind_self.process_id = getpid();
|
||||
|
||||
sa_bind_rdma.sa_family = AF_MXM_IB_LOCAL;
|
||||
sa_bind_rdma.lid = 0;
|
||||
sa_bind_rdma.pkey = 0;
|
||||
sa_bind_rdma.qp_num = 0;
|
||||
sa_bind_rdma.sl = 0;
|
||||
|
||||
ep_opt.ptl_bind_addr[MXM_PTL_SELF] = (struct sockaddr*)&sa_bind_self;
|
||||
ep_opt.ptl_bind_addr[MXM_PTL_RDMA] = (struct sockaddr*)&sa_bind_rdma;
|
||||
|
||||
/* Open MXM endpoint */
|
||||
err = mxm_ep_create(ompi_mtl_mxm.mxm_context, &ep_opt, &ompi_mtl_mxm.ep);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
|
||||
mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get address for each PTL on this endpoint, and share it with other ranks.
|
||||
*/
|
||||
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_RDMA)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS != ompi_modex_send(&mca_mtl_mxm_component.super.mtl_version,
|
||||
&ep_info, sizeof(ep_info))) {
|
||||
MXM_ERROR("Open MPI couldn't distribute EP connection details");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Register the MXM progress function */
|
||||
opal_progress_register(ompi_mtl_mxm_progress);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl)
|
||||
{
|
||||
opal_progress_unregister(ompi_mtl_mxm_progress);
|
||||
mxm_ep_destroy(ompi_mtl_mxm.ep);
|
||||
mxm_cleanup(ompi_mtl_mxm.mxm_context);
|
||||
ompi_mtl_mxm.mxm_context = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
||||
struct ompi_proc_t** procs, /*const*/
|
||||
struct mca_mtl_base_endpoint_t **mtl_peer_data)
|
||||
{
|
||||
ompi_mtl_mxm_ep_conn_info_t **ep_info;
|
||||
mxm_conn_req_t *conn_reqs;
|
||||
mxm_error_t err;
|
||||
size_t size;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
|
||||
/* Allocate connection requests */
|
||||
conn_reqs = malloc(nprocs * sizeof *conn_reqs);
|
||||
ep_info = malloc(nprocs * sizeof *ep_info);
|
||||
if (NULL == conn_reqs || NULL == ep_info) {
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Get the EP connection requests for all the processes from modex */
|
||||
for (i = 0; i < nprocs; ++i) {
|
||||
rc = ompi_modex_recv(&mca_mtl_mxm_component.super.mtl_version, procs[i],
|
||||
(void**)&ep_info[i], &size);
|
||||
if (rc != OMPI_SUCCESS || size != sizeof(ompi_mtl_mxm_ep_conn_info_t)) {
|
||||
goto bail;
|
||||
}
|
||||
|
||||
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&ep_info[i]->ptl_addr[MXM_PTL_SELF];
|
||||
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = NULL;
|
||||
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&ep_info[i]->ptl_addr[MXM_PTL_RDMA];
|
||||
}
|
||||
|
||||
/* Connect to remote peers */
|
||||
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, 1000);
|
||||
if (MXM_OK != err) {
|
||||
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
|
||||
for (i = 0; i < nprocs; ++i) {
|
||||
if (MXM_OK != conn_reqs[i].error) {
|
||||
MXM_ERROR("MXM EP connect to %s error: %s\n", procs[i]->proc_hostname,
|
||||
mxm_error_string(conn_reqs[i].error));
|
||||
}
|
||||
}
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Save returned connections */
|
||||
for (i = 0; i < nprocs; ++i) {
|
||||
mtl_peer_data[i] = (mca_mtl_mxm_endpoint_t *) OBJ_NEW(mca_mtl_mxm_endpoint_t);
|
||||
mtl_peer_data[i]->mtl_mxm_module = &ompi_mtl_mxm;
|
||||
mtl_peer_data[i]->mxm_conn = conn_reqs[i].conn;
|
||||
}
|
||||
|
||||
bail:
|
||||
free(conn_reqs);
|
||||
free(ep_info);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
||||
struct ompi_proc_t** procs,
|
||||
struct mca_mtl_base_endpoint_t **mtl_peer_data)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < nprocs; ++i) {
|
||||
mxm_ep_disconnect(mtl_peer_data[i]->mxm_conn);
|
||||
OBJ_RELEASE(mtl_peer_data[i]);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
mxm_error_t err;
|
||||
mxm_mq_h mq;
|
||||
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
assert(NULL != ompi_mtl_mxm.mxm_context);
|
||||
|
||||
err = mxm_mq_create(ompi_mtl_mxm.mxm_context, comm->c_contextid, &mq);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "mxm mq create", true, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
comm->c_pml_comm = (void*)mq;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
if (NULL != ompi_mtl_mxm.mxm_context) {
|
||||
mxm_mq_destroy((mxm_mq_h)comm->c_pml_comm);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_progress(void)
|
||||
{
|
||||
mxm_error_t err;
|
||||
|
||||
err = mxm_progress(ompi_mtl_mxm.mxm_context);
|
||||
if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err) ) {
|
||||
orte_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err));
|
||||
}
|
||||
return 1;
|
||||
}
|
71
ompi/mca/mtl/mxm/mtl_mxm.h
Обычный файл
71
ompi/mca/mtl/mxm/mtl_mxm.h
Обычный файл
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MTL_MXM_H_HAS_BEEN_INCLUDED
|
||||
#define MTL_MXM_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <mxm/api/mxm_api.h>
|
||||
#include <mxm/api/mxm_addr.h>
|
||||
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/mca/mtl/base/base.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* MTL interface functions */
|
||||
extern int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t* mtl,
|
||||
size_t nprocs, struct ompi_proc_t** procs,
|
||||
struct mca_mtl_base_endpoint_t **mtl_peer_data);
|
||||
|
||||
extern int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t* mtl,
|
||||
size_t nprocs, struct ompi_proc_t** procs,
|
||||
struct mca_mtl_base_endpoint_t **mtl_peer_data);
|
||||
|
||||
extern int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode);
|
||||
|
||||
extern int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest,
|
||||
int tag, struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode, bool blocking,
|
||||
mca_mtl_request_t * mtl_request);
|
||||
|
||||
extern int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src,
|
||||
int tag, struct opal_convertor_t *convertor,
|
||||
struct mca_mtl_request_t *mtl_request);
|
||||
|
||||
extern int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src,
|
||||
int tag, int *flag,
|
||||
struct ompi_status_public_t *status);
|
||||
|
||||
extern int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
|
||||
struct mca_mtl_request_t *mtl_request, int flag);
|
||||
|
||||
extern int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
extern int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
extern int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl);
|
||||
|
||||
int ompi_mtl_mxm_module_init(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
36
ompi/mca/mtl/mxm/mtl_mxm_cancel.c
Обычный файл
36
ompi/mca/mtl/mxm/mtl_mxm_cancel.c
Обычный файл
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
|
||||
struct mca_mtl_request_t *mtl_request, int flag)
|
||||
{
|
||||
|
||||
mxm_error_t err;
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
|
||||
|
||||
err = mxm_req_cancel(&mtl_mxm_request->mxm_request);
|
||||
if (MXM_OK == err) {
|
||||
err = mxm_req_test(&mtl_mxm_request->mxm_request);
|
||||
if (MXM_OK == err) {
|
||||
mtl_request->ompi_req->req_status._cancelled = true;
|
||||
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
} else if (MXM_ERR_NO_MESSAGE == err) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
122
ompi/mca/mtl/mxm/mtl_mxm_component.c
Обычный файл
122
ompi/mca/mtl/mxm/mtl_mxm_component.c
Обычный файл
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static int ompi_mtl_mxm_component_open(void);
|
||||
static int ompi_mtl_mxm_component_close(void);
|
||||
static int ompi_mtl_mxm_component_register(void);
|
||||
|
||||
int mca_mtl_mxm_output = -1;
|
||||
|
||||
|
||||
static mca_mtl_base_module_t
|
||||
* ompi_mtl_mxm_component_init(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
|
||||
mca_mtl_mxm_component_t mca_mtl_mxm_component = {
|
||||
{
|
||||
/*
|
||||
* First, the mca_base_component_t struct containing meta
|
||||
* information about the component itself
|
||||
*/
|
||||
{
|
||||
MCA_MTL_BASE_VERSION_2_0_0,
|
||||
"mxm", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
ompi_mtl_mxm_component_open, /* component open */
|
||||
ompi_mtl_mxm_component_close, /* component close */
|
||||
NULL,
|
||||
ompi_mtl_mxm_component_register
|
||||
},
|
||||
{
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
ompi_mtl_mxm_component_init /* component init */
|
||||
}
|
||||
};
|
||||
|
||||
static int ompi_mtl_mxm_component_register(void)
|
||||
{
|
||||
|
||||
mca_base_param_reg_int(&mca_mtl_mxm_component.super.mtl_version, "verbose",
|
||||
"Verbose level of the MXM component",
|
||||
false, false,
|
||||
0,
|
||||
&ompi_mtl_mxm.verbose);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_mtl_mxm_component_open(void)
|
||||
{
|
||||
struct stat st;
|
||||
|
||||
/* Component available only if IB hardware is present */
|
||||
if (0 == stat("/dev/infiniband/uverbs0", &st)) {
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
}
|
||||
|
||||
static int ompi_mtl_mxm_component_close(void)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static mca_mtl_base_module_t*
|
||||
ompi_mtl_mxm_component_init(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
mxm_context_opts_t mxm_opts;
|
||||
mxm_error_t err;
|
||||
int rc;
|
||||
|
||||
mca_mtl_mxm_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
|
||||
|
||||
mxm_fill_context_opts(&mxm_opts);
|
||||
err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "mxm init", true,
|
||||
mxm_error_string(err));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rc = ompi_mtl_mxm_module_init();
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Calculate MTL constraints according to MXM types */
|
||||
ompi_mtl_mxm.super.mtl_max_contextid = 1UL << (sizeof(mxm_ctxid_t) * 8);
|
||||
ompi_mtl_mxm.super.mtl_max_tag = 1UL << (sizeof(mxm_tag_t) * 8 - 2);
|
||||
ompi_mtl_mxm.super.mtl_request_size =
|
||||
sizeof(mca_mtl_mxm_request_t) - sizeof(struct mca_mtl_request_t);
|
||||
return &ompi_mtl_mxm.super;
|
||||
}
|
||||
|
34
ompi/mca/mtl/mxm/mtl_mxm_debug.h
Обычный файл
34
ompi/mca/mtl/mxm/mtl_mxm_debug.h
Обычный файл
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MTL_MXM_DEBUG_H
|
||||
#define MTL_MXM_DEBUG_H
|
||||
#pragma GCC system_header
|
||||
|
||||
#ifdef __BASE_FILE__
|
||||
#define __MXM_FILE__ __BASE_FILE__
|
||||
#else
|
||||
#define __MXM_FILE__ __FILE__
|
||||
#endif
|
||||
|
||||
#define MXM_VERBOSE(level, format, ...) \
|
||||
opal_output_verbose(level, mca_mtl_mxm_output, "%s:%d - %s() " format, \
|
||||
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define MXM_ERROR(format, ... ) \
|
||||
opal_output_verbose(0, mca_mtl_mxm_output, "Error: %s:%d - %s() " format, \
|
||||
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
|
||||
#define MXM_MODULE_VERBOSE(mxm_module, level, format, ...) \
|
||||
MXM_VERBOSE(level, "[%d] " format, (mxm_module)->rank, ## __VA_ARGS__)
|
||||
|
||||
extern int mca_mtl_mxm_output;
|
||||
|
||||
#endif
|
42
ompi/mca/mtl/mxm/mtl_mxm_endpoint.c
Обычный файл
42
ompi/mca/mtl/mxm/mtl_mxm_endpoint.c
Обычный файл
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include "ompi/types.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_endpoint.h"
|
||||
|
||||
/*
|
||||
* Initialize state of the endpoint instance.
|
||||
*
|
||||
*/
|
||||
|
||||
static void mca_mtl_mxm_endpoint_construct(mca_mtl_mxm_endpoint_t* endpoint)
|
||||
{
|
||||
endpoint->mtl_mxm_module = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy a endpoint
|
||||
*
|
||||
*/
|
||||
|
||||
static void mca_mtl_mxm_endpoint_destruct(mca_mtl_mxm_endpoint_t* endpoint)
|
||||
{
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_mtl_mxm_endpoint_t,
|
||||
opal_list_item_t,
|
||||
mca_mtl_mxm_endpoint_construct,
|
||||
mca_mtl_mxm_endpoint_destruct);
|
43
ompi/mca/mtl/mxm/mtl_mxm_endpoint.h
Обычный файл
43
ompi/mca/mtl/mxm/mtl_mxm_endpoint.h
Обычный файл
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_MTL_MXM_ENDPOINT_H
|
||||
#define MCA_MTL_MXM_ENDPOINT_H
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "mtl_mxm.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint_t);
|
||||
|
||||
/**
|
||||
* An abstraction that represents a connection to a endpoint process.
|
||||
* An instance of mca_mtl_base_endpoint_t is associated w/ each process
|
||||
* and MTL pair at startup. However, connections to the endpoint
|
||||
* are established dynamically on an as-needed basis:
|
||||
*/
|
||||
|
||||
struct mca_mtl_base_endpoint_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
struct mca_mtl_mxm_module_t* mtl_mxm_module;
|
||||
/**< MTL instance that created this connection */
|
||||
|
||||
mxm_conn_h mxm_conn;
|
||||
/**< MXM Connection handle*/
|
||||
};
|
||||
|
||||
typedef struct mca_mtl_base_endpoint_t mca_mtl_base_endpoint_t;
|
||||
typedef mca_mtl_base_endpoint_t mca_mtl_mxm_endpoint_t;
|
||||
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint);
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
45
ompi/mca/mtl/mxm/mtl_mxm_probe.c
Обычный файл
45
ompi/mca/mtl/mxm/mtl_mxm_probe.c
Обычный файл
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src, int tag,
|
||||
int *flag, struct ompi_status_public_t *status)
|
||||
{
|
||||
mxm_error_t err;
|
||||
mxm_req_t req;
|
||||
|
||||
req.state = MXM_REQ_NEW;
|
||||
req.mq = (mxm_mq_h)comm->c_pml_comm;
|
||||
req.tag = tag;
|
||||
req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
|
||||
req.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
|
||||
|
||||
err = mxm_req_probe(&req);
|
||||
if (MXM_OK == err) {
|
||||
*flag = 1;
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.conn);
|
||||
status->MPI_TAG = req.completion.sender_tag;
|
||||
status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req.completion.status);
|
||||
status->_ucount = req.completion.actual_len;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
} else if (MXM_ERR_NO_MESSAGE == err) {
|
||||
*flag = 0;
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
90
ompi/mca/mtl/mxm/mtl_mxm_recv.c
Обычный файл
90
ompi/mca/mtl/mxm/mtl_mxm_recv.c
Обычный файл
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
|
||||
static void ompi_mtl_mxm_recv_completion_cb(mxm_req_t *req)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
|
||||
struct ompi_request_t *ompi_req = mtl_mxm_request->super.ompi_req;
|
||||
|
||||
/* Set completion status and envelope */
|
||||
ompi_req->req_status.MPI_TAG = req->completion.sender_tag;
|
||||
ompi_req->req_status.MPI_SOURCE = req->completion.sender_imm;
|
||||
ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->completion.status);
|
||||
ompi_req->req_status._ucount = req->completion.actual_len;
|
||||
|
||||
/* Copy data */
|
||||
ompi_mtl_datatype_unpack(mtl_mxm_request->convertor, mtl_mxm_request->buf,
|
||||
req->completion.actual_len);
|
||||
|
||||
if (mtl_mxm_request->free_after) {
|
||||
free(mtl_mxm_request->buf);
|
||||
}
|
||||
|
||||
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
|
||||
}
|
||||
|
||||
|
||||
int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
struct mca_mtl_request_t *mtl_request)
|
||||
{
|
||||
mca_mtl_mxm_request_t * mtl_mxm_request;
|
||||
mca_mtl_mxm_endpoint_t* mxm_endpoint;
|
||||
ompi_proc_t* ompi_proc;
|
||||
mxm_error_t err;
|
||||
int ret;
|
||||
|
||||
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
|
||||
|
||||
mtl_mxm_request->convertor = convertor;
|
||||
ret = ompi_mtl_datatype_recv_buf(mtl_mxm_request->convertor,
|
||||
&mtl_mxm_request->buf,
|
||||
&mtl_mxm_request->length,
|
||||
&mtl_mxm_request->free_after);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* prepare a receive request embedded in the MTL request */
|
||||
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
|
||||
mtl_mxm_request->mxm_request.mq = (mxm_mq_h)comm->c_pml_comm;
|
||||
mtl_mxm_request->mxm_request.tag = tag;
|
||||
mtl_mxm_request->mxm_request.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
|
||||
mtl_mxm_request->mxm_request.conn = (src == MPI_ANY_SOURCE) ? NULL :
|
||||
ompi_mtl_mxm_conn_lookup(comm, src);
|
||||
|
||||
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
|
||||
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
|
||||
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_recv_completion_cb;
|
||||
mtl_mxm_request->mxm_request.context = mtl_mxm_request;
|
||||
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
|
||||
|
||||
/* post-recv */
|
||||
err = mxm_req_recv(&mtl_mxm_request->mxm_request);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting receive", true,
|
||||
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
28
ompi/mca/mtl/mxm/mtl_mxm_request.h
Обычный файл
28
ompi/mca/mtl/mxm/mtl_mxm_request.h
Обычный файл
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef OMPI_MTL_MXM_REQUEST_H
|
||||
#define OMPI_MTL_MXM_REQUEST_H
|
||||
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "mtl_mxm.h"
|
||||
|
||||
|
||||
struct mca_mtl_mxm_request_t {
|
||||
struct mca_mtl_request_t super;
|
||||
mxm_req_t mxm_request;
|
||||
/* mxm_segment_t mxm_segment[1]; */
|
||||
void *buf;
|
||||
size_t length;
|
||||
struct opal_convertor_t *convertor;
|
||||
bool free_after;
|
||||
};
|
||||
typedef struct mca_mtl_mxm_request_t mca_mtl_mxm_request_t;
|
||||
|
||||
#endif
|
136
ompi/mca/mtl/mxm/mtl_mxm_send.c
Обычный файл
136
ompi/mca/mtl/mxm/mtl_mxm_send.c
Обычный файл
@ -0,0 +1,136 @@
|
||||
/* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
|
||||
|
||||
static void ompi_mtl_mxm_send_completion_cb(mxm_req_t *req)
|
||||
{
|
||||
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request;
|
||||
mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
|
||||
|
||||
if (mtl_mxm_request->free_after) {
|
||||
free(mtl_mxm_request->buf);
|
||||
}
|
||||
|
||||
switch (req->completion.status) {
|
||||
case MXM_OK:
|
||||
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
|
||||
= OMPI_SUCCESS;
|
||||
break;
|
||||
case MXM_ERR_MESSAGE_TRUNCATED:
|
||||
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
|
||||
= MPI_ERR_TRUNCATE;
|
||||
break;
|
||||
default:
|
||||
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
|
||||
= MPI_ERR_INTERN;
|
||||
break;
|
||||
}
|
||||
|
||||
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode)
|
||||
{
|
||||
mxm_req_t mxm_req;
|
||||
bool free_after;
|
||||
mxm_error_t err;
|
||||
int ret;
|
||||
|
||||
/* prepare local send request */
|
||||
mxm_req.state = MXM_REQ_NEW;
|
||||
mxm_req.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mxm_req.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
mxm_req.tag = tag;
|
||||
mxm_req.imm_data = ompi_comm_rank(comm);
|
||||
mxm_req.completed_cb = NULL;
|
||||
mxm_req.flags = 0;
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mxm_req.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
}
|
||||
ret = ompi_mtl_datatype_pack(convertor, &mxm_req.data.buf.ptr, &mxm_req.data.buf.len,
|
||||
&free_after);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* post-send */
|
||||
err = mxm_req_send(&mxm_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* wait for request completion */
|
||||
err = mxm_req_wait(&mxm_req);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error while waiting in send", true, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode, bool blocking,
|
||||
mca_mtl_request_t * mtl_request)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request;
|
||||
mxm_error_t err;
|
||||
int ret;
|
||||
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
|
||||
mtl_mxm_request->convertor = convertor;
|
||||
ret = ompi_mtl_datatype_pack(mtl_mxm_request->convertor,
|
||||
&mtl_mxm_request->buf,
|
||||
&mtl_mxm_request->length,
|
||||
&mtl_mxm_request->free_after);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* prepare a send request embedded in the MTL request */
|
||||
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
|
||||
mtl_mxm_request->mxm_request.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mtl_mxm_request->mxm_request.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
mtl_mxm_request->mxm_request.tag = tag;
|
||||
mtl_mxm_request->mxm_request.imm_data = ompi_comm_rank(comm);
|
||||
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
|
||||
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
|
||||
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_send_completion_cb;
|
||||
mtl_mxm_request->mxm_request.context = mtl_mxm_request;
|
||||
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mtl_mxm_request->mxm_request.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
}
|
||||
|
||||
/* post-send */
|
||||
err = mxm_req_send(&mtl_mxm_request->mxm_request);
|
||||
if (MXM_OK != err) {
|
||||
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
73
ompi/mca/mtl/mxm/mtl_mxm_types.h
Обычный файл
73
ompi/mca/mtl/mxm/mtl_mxm_types.h
Обычный файл
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
|
||||
#define MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "mtl_mxm.h"
|
||||
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/mca/mtl/base/base.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "mtl_mxm_endpoint.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* MTL Module Interface
|
||||
*/
|
||||
typedef struct mca_mtl_mxm_module_t {
|
||||
mca_mtl_base_module_t super; /**< base MTL interface */
|
||||
int verbose;
|
||||
mxm_h mxm_context;
|
||||
mxm_ep_h ep;
|
||||
} mca_mtl_mxm_module_t;
|
||||
|
||||
|
||||
typedef struct ompi_mtl_mxm_ep_conn_info_t {
|
||||
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
|
||||
} ompi_mtl_mxm_ep_conn_info_t;
|
||||
|
||||
extern mca_mtl_mxm_module_t ompi_mtl_mxm;
|
||||
|
||||
typedef struct mca_mtl_mxm_component_t {
|
||||
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
|
||||
} mca_mtl_mxm_component_t;
|
||||
|
||||
|
||||
OMPI_DECLSPEC mca_mtl_mxm_component_t mca_mtl_mxm_component;
|
||||
|
||||
|
||||
static inline mxm_conn_h ompi_mtl_mxm_conn_lookup(struct ompi_communicator_t* comm, int rank) {
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup(comm, rank);
|
||||
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_pml;
|
||||
|
||||
return endpoint->mxm_conn;
|
||||
}
|
||||
|
||||
static inline mxm_mq_h ompi_mtl_mxm_mq_lookup(struct ompi_communicator_t* comm) {
|
||||
return (mxm_mq_h)comm->c_pml_comm;
|
||||
}
|
||||
|
||||
static inline int ompi_mtl_mxm_to_mpi_status(mxm_error_t status) {
|
||||
if (MXM_OK == status) {
|
||||
return OMPI_SUCCESS;
|
||||
} else if (MXM_ERR_MESSAGE_TRUNCATED == status) {
|
||||
return MPI_ERR_TRUNCATE;
|
||||
} else {
|
||||
return MPI_ERR_INTERN;
|
||||
}
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user