1
1

initial implementation of MXM MTL layer

This commit was SVN r24946.
Этот коммит содержится в:
Mike Dubman 2011-07-26 04:36:21 +00:00
родитель 715f871605
Коммит aefffa073d
17 изменённых файлов: 1215 добавлений и 1 удалений

17
README
Просмотреть файл

@ -140,6 +140,7 @@ Detailed Open MPI v1.5 Feature List:
process)
- Many other small improvements and bug fixes, too numerous to
list here
- Mellanox MXM MTL layer implementation
Known issues
------------
@ -493,10 +494,11 @@ Network Support
performance:
- Myrinet MX (including Open-MX, but not GM)
- InfiniPath PSM
- Mellanox MXM
- Portals
Open MPI will, by default, choose to use "cm" when the InfiniPath
PSM MTL can be used. Otherwise, "ob1" will be used and the
PSM or Mellanox MXM MTL can be used. Otherwise, "ob1" will be used and the
corresponding BTLs will be selected. "csum" will never be selected
by default. Users can force the use of ob1 or cm if desired by
setting the "pml" MCA parameter at run-time:
@ -702,6 +704,19 @@ for a full list); a summary of the more commonly used ones follows:
look in <psm directory>/lib and <psm directory>/lib64, which covers
most cases. This option is only needed for special configurations.
--with-mxm=<directory>
Specify the directory where the Mellanox MXM library and
header files are located. This option is generally only necessary
if the InfiniPath headers and libraries are not in default
compiler/linker search paths.
MXM is the support library for Mellanox network adapters.
--with-mxm-libdir=<directory>
Look in directory for the MXM libraries. By default, Open MPI will
look in <mxm directory>/lib and <mxm directory>/lib64, which covers
most cases. This option is only needed for special configurations.
--with-sctp=<directory>
Specify the directory where the SCTP libraries and header files are
located. This option is generally only necessary if the SCTP headers

57
ompi/config/ompi_check_mxm.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,57 @@
# -*- shell-script -*-
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# OMPI_CHECK_MXM(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
# check if MXM support can be found. sets prefix_{CPPFLAGS,
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found
AC_DEFUN([OMPI_CHECK_MXM],[
AC_ARG_WITH([mxm],
[AC_HELP_STRING([--with-mxm(=DIR)],
[Build Mellanox Messaging support])])
OMPI_CHECK_WITHDIR([mxm], [$with_mxm], [include/mxm/api/mxm_api.h])
AC_ARG_WITH([mxm-libdir],
[AC_HELP_STRING([--with-mxm-libdir=DIR],
[Search for Mellanox Messaging libraries in DIR])])
OMPI_CHECK_WITHDIR([mxm-libdir], [$with_mxm_libdir], [libmxm.*])
ompi_check_mxm_$1_save_CPPFLAGS="$CPPFLAGS"
ompi_check_mxm_$1_save_LDFLAGS="$LDFLAGS"
ompi_check_mxm_$1_save_LIBS="$LIBS"
AS_IF([test "$with_mxm" != "no"],
[AS_IF([test ! -z "$with_mxm" -a "$with_mxm" != "yes"],
[ompi_check_mxm_dir="$with_mxm"])
AS_IF([test ! -z "$with_mxm_libdir" -a "$with_mxm_libdir" != "yes"],
[ompi_check_mxm_libdir="$with_mxm_libdir"])
OMPI_CHECK_PACKAGE([$1],
[mxm/api/mxm_api.h],
[mxm],
[mxm_cleanup],
[],
[$ompi_check_mxm_dir],
[$ompi_check_mxm_libdir],
[ompi_check_mxm_happy="yes"],
[ompi_check_mxm_happy="no"])],
[ompi_check_mxm_happy="no"])
CPPFLAGS="$ompi_check_mxm_$1_save_CPPFLAGS"
LDFLAGS="$ompi_check_mxm_$1_save_LDFLAGS"
LIBS="$ompi_check_mxm_$1_save_LIBS"
AS_IF([test "$ompi_check_mxm_happy" = "yes"],
[$2],
[AS_IF([test ! -z "$with_mxm" -a "$with_mxm" != "no"],
[AC_MSG_ERROR([MXM support requested but not found. Aborting])])
$3])
])

49
ompi/mca/mtl/mxm/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,49 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(mtl_mxm_CPPFLAGS)
dist_pkgdata_DATA = help-mtl-mxm.txt
mtl_mxm_sources = \
mtl_mxm.c \
mtl_mxm.h \
mtl_mxm_cancel.c \
mtl_mxm_component.c \
mtl_mxm_endpoint.c \
mtl_mxm_endpoint.h \
mtl_mxm_probe.c \
mtl_mxm_recv.c \
mtl_mxm_request.h \
mtl_mxm_send.c \
mtl_mxm_debug.h \
mtl_mxm_types.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_mtl_mxm_DSO
component_noinst =
component_install = mca_mtl_mxm.la
else
component_noinst = libmca_mtl_mxm.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
mca_mtl_mxm_la_LIBADD = $(mtl_mxm_LIBS)
mca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
libmca_mtl_mxm_la_LIBADD = $(mtl_mxm_LIBS)
libmca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)

32
ompi/mca/mtl/mxm/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,32 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
# MCA_mtl_mxm_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_ompi_mtl_mxm_CONFIG],[
AC_CONFIG_FILES([ompi/mca/mtl/mxm/Makefile])
OMPI_CHECK_MXM([mtl_mxm],
[mtl_mxm_happy="yes"],
[mtl_mxm_happy="no"])
AS_IF([test "$mtl_mxm_happy" = "yes"],
[mtl_mxm_WRAPPER_EXTRA_LDFLAGS="$mtl_mxm_LDFLAGS"
mtl_mxm_WRAPPER_EXTRA_LIBS="$mtl_mxm_LIBS"
$1],
[$2])
# substitute in the things needed to build mxm
AC_SUBST([mtl_mxm_CFLAGS])
AC_SUBST([mtl_mxm_CPPFLAGS])
AC_SUBST([mtl_mxm_LDFLAGS])
AC_SUBST([mtl_mxm_LIBS])
])dnl

67
ompi/mca/mtl/mxm/help-mtl-mxm.txt Обычный файл
Просмотреть файл

@ -0,0 +1,67 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[no uuid present]
Error obtaining unique transport key from ORTE (orte_precondition_transports %s
the environment).
Local host: %s
[unable to create endpoint]
MXM was unable to create an endpoint. Please make sure that the network link is
active on the node and the hardware is functioning.
Error: %s
[unable to extract endpoint ib address]
MXM was unable to read IB settings for endpoint
Error: %s
[unable to extract endpoint local address]
MXM was unable to read shmem settings for endpoint
Error: %s
[mxm mq create]
Failed to create MQ for endpoint
Error: %s
[errors during mxm_progress]
Error %s occurred in attempting to make network progress (mxm_progress).
[mxm init]
Initialization of MXM library failed.
Error: %s
[error polling network]
Error %s occurred in attempting to make network progress (mxm_mq_ipeek).
[error posting receive]
Unable to post application receive buffer
Error: %s
Buffer: %p
Length: %d
[error posting send]
Unable to post application send buffer
Error: %s
[error while waiting in send]
Unable while waiting in send
Error: %s

274
ompi/mca/mtl/mxm/mtl_mxm.c Обычный файл
Просмотреть файл

@ -0,0 +1,274 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_endpoint.h"
#include "mtl_mxm_request.h"
#include "mtl_mxm_debug.h"
mca_mtl_mxm_module_t ompi_mtl_mxm = {
{
0, /* max context id */
0, /* max tag value */
0, /* request reserve space */
0, /* flags */
ompi_mtl_mxm_add_procs,
ompi_mtl_mxm_del_procs,
ompi_mtl_mxm_finalize,
ompi_mtl_mxm_send,
ompi_mtl_mxm_isend,
ompi_mtl_mxm_irecv,
ompi_mtl_mxm_iprobe,
ompi_mtl_mxm_cancel,
ompi_mtl_mxm_add_comm,
ompi_mtl_mxm_del_comm
}
};
static uint32_t ompi_mtl_mxm_get_job_id(void)
{
uint8_t unique_job_key[16];
uint32_t job_key;
unsigned long long *uu;
char *generated_key;
uint16_t *jkp;
jkp = (uint16_t *) unique_job_key;
uu = (unsigned long long *) unique_job_key;
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
memset(uu, 0, sizeof(unique_job_key));
if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) {
orte_show_help("help-mtl-mxm.txt", "no uuid present", true,
generated_key ? "could not be parsed from" :
"not present in", orte_process_info.nodename);
return 0;
}
job_key = ((jkp[2] ^ jkp[3]) >> 8) | ((jkp[0] ^ jkp[1]) << 8);
job_key ^= ((jkp[6] ^ jkp[7]) >> 8) | ((jkp[4] ^ jkp[5]) << 8);
job_key &= ~0xff;
return job_key;
}
int ompi_mtl_mxm_progress(void);
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
{
size_t addrlen;
mxm_error_t err;
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
true, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_module_init(void)
{
struct sockaddr_mxm_local_proc sa_bind_self;
struct sockaddr_mxm_ib_local sa_bind_rdma;
mxm_ep_opts_t ep_opt;
ompi_mtl_mxm_ep_conn_info_t ep_info;
mxm_error_t err;
uint32_t jobid;
jobid = ompi_mtl_mxm_get_job_id();
if (0 == jobid) {
MXM_ERROR("Failed to generate jobid");
return OMPI_ERROR;
}
/* Setup the endpoint options and local addresses to bind to. */
mxm_fill_ep_opts(&ep_opt);
sa_bind_self.sa_family = AF_MXM_LOCAL_PROC;
sa_bind_self.context_id = jobid;
sa_bind_self.process_id = getpid();
sa_bind_rdma.sa_family = AF_MXM_IB_LOCAL;
sa_bind_rdma.lid = 0;
sa_bind_rdma.pkey = 0;
sa_bind_rdma.qp_num = 0;
sa_bind_rdma.sl = 0;
ep_opt.ptl_bind_addr[MXM_PTL_SELF] = (struct sockaddr*)&sa_bind_self;
ep_opt.ptl_bind_addr[MXM_PTL_RDMA] = (struct sockaddr*)&sa_bind_rdma;
/* Open MXM endpoint */
err = mxm_ep_create(ompi_mtl_mxm.mxm_context, &ep_opt, &ompi_mtl_mxm.ep);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
mxm_error_string(err));
return OMPI_ERROR;
}
/*
* Get address for each PTL on this endpoint, and share it with other ranks.
*/
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_RDMA)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS != ompi_modex_send(&mca_mtl_mxm_component.super.mtl_version,
&ep_info, sizeof(ep_info))) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
return OMPI_ERROR;
}
/* Register the MXM progress function */
opal_progress_register(ompi_mtl_mxm_progress);
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl)
{
opal_progress_unregister(ompi_mtl_mxm_progress);
mxm_ep_destroy(ompi_mtl_mxm.ep);
mxm_cleanup(ompi_mtl_mxm.mxm_context);
ompi_mtl_mxm.mxm_context = NULL;
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
struct ompi_proc_t** procs, /*const*/
struct mca_mtl_base_endpoint_t **mtl_peer_data)
{
ompi_mtl_mxm_ep_conn_info_t **ep_info;
mxm_conn_req_t *conn_reqs;
mxm_error_t err;
size_t size;
size_t i;
int rc;
assert(mtl == &ompi_mtl_mxm.super);
/* Allocate connection requests */
conn_reqs = malloc(nprocs * sizeof *conn_reqs);
ep_info = malloc(nprocs * sizeof *ep_info);
if (NULL == conn_reqs || NULL == ep_info) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto bail;
}
/* Get the EP connection requests for all the processes from modex */
for (i = 0; i < nprocs; ++i) {
rc = ompi_modex_recv(&mca_mtl_mxm_component.super.mtl_version, procs[i],
(void**)&ep_info[i], &size);
if (rc != OMPI_SUCCESS || size != sizeof(ompi_mtl_mxm_ep_conn_info_t)) {
goto bail;
}
conn_reqs[i].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&ep_info[i]->ptl_addr[MXM_PTL_SELF];
conn_reqs[i].ptl_addr[MXM_PTL_SHM] = NULL;
conn_reqs[i].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&ep_info[i]->ptl_addr[MXM_PTL_RDMA];
}
/* Connect to remote peers */
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, nprocs, 1000);
if (MXM_OK != err) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
for (i = 0; i < nprocs; ++i) {
if (MXM_OK != conn_reqs[i].error) {
MXM_ERROR("MXM EP connect to %s error: %s\n", procs[i]->proc_hostname,
mxm_error_string(conn_reqs[i].error));
}
}
goto bail;
}
/* Save returned connections */
for (i = 0; i < nprocs; ++i) {
mtl_peer_data[i] = (mca_mtl_mxm_endpoint_t *) OBJ_NEW(mca_mtl_mxm_endpoint_t);
mtl_peer_data[i]->mtl_mxm_module = &ompi_mtl_mxm;
mtl_peer_data[i]->mxm_conn = conn_reqs[i].conn;
}
bail:
free(conn_reqs);
free(ep_info);
return rc;
}
int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
struct ompi_proc_t** procs,
struct mca_mtl_base_endpoint_t **mtl_peer_data)
{
size_t i;
for (i = 0; i < nprocs; ++i) {
mxm_ep_disconnect(mtl_peer_data[i]->mxm_conn);
OBJ_RELEASE(mtl_peer_data[i]);
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm)
{
mxm_error_t err;
mxm_mq_h mq;
assert(mtl == &ompi_mtl_mxm.super);
assert(NULL != ompi_mtl_mxm.mxm_context);
err = mxm_mq_create(ompi_mtl_mxm.mxm_context, comm->c_contextid, &mq);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "mxm mq create", true, mxm_error_string(err));
return OMPI_ERROR;
}
comm->c_pml_comm = (void*)mq;
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm)
{
assert(mtl == &ompi_mtl_mxm.super);
if (NULL != ompi_mtl_mxm.mxm_context) {
mxm_mq_destroy((mxm_mq_h)comm->c_pml_comm);
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_progress(void)
{
mxm_error_t err;
err = mxm_progress(ompi_mtl_mxm.mxm_context);
if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err) ) {
orte_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err));
}
return 1;
}

71
ompi/mca/mtl/mxm/mtl_mxm.h Обычный файл
Просмотреть файл

@ -0,0 +1,71 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_H_HAS_BEEN_INCLUDED
#define MTL_MXM_H_HAS_BEEN_INCLUDED
#include <stdint.h>
#include <mxm/api/mxm_api.h>
#include <mxm/api/mxm_addr.h>
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "opal/datatype/opal_convertor.h"
BEGIN_C_DECLS
/* MTL interface functions */
extern int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t* mtl,
size_t nprocs, struct ompi_proc_t** procs,
struct mca_mtl_base_endpoint_t **mtl_peer_data);
extern int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t* mtl,
size_t nprocs, struct ompi_proc_t** procs,
struct mca_mtl_base_endpoint_t **mtl_peer_data);
extern int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode);
extern int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest,
int tag, struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode, bool blocking,
mca_mtl_request_t * mtl_request);
extern int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src,
int tag, struct opal_convertor_t *convertor,
struct mca_mtl_request_t *mtl_request);
extern int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src,
int tag, int *flag,
struct ompi_status_public_t *status);
extern int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
struct mca_mtl_request_t *mtl_request, int flag);
extern int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm);
extern int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm);
extern int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl);
int ompi_mtl_mxm_module_init(void);
END_C_DECLS
#endif

36
ompi/mca/mtl/mxm/mtl_mxm_cancel.c Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "mtl_mxm_request.h"
int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
struct mca_mtl_request_t *mtl_request, int flag)
{
mxm_error_t err;
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
err = mxm_req_cancel(&mtl_mxm_request->mxm_request);
if (MXM_OK == err) {
err = mxm_req_test(&mtl_mxm_request->mxm_request);
if (MXM_OK == err) {
mtl_request->ompi_req->req_status._cancelled = true;
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
return OMPI_SUCCESS;
} else {
return OMPI_ERROR;
}
} else if (MXM_ERR_NO_MESSAGE == err) {
return OMPI_SUCCESS;
}
return OMPI_ERROR;
}

122
ompi/mca/mtl/mxm/mtl_mxm_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,122 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "orte/util/show_help.h"
#include "opal/mca/event/event.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/proc/proc.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
static int ompi_mtl_mxm_component_open(void);
static int ompi_mtl_mxm_component_close(void);
static int ompi_mtl_mxm_component_register(void);
int mca_mtl_mxm_output = -1;
static mca_mtl_base_module_t
* ompi_mtl_mxm_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
mca_mtl_mxm_component_t mca_mtl_mxm_component = {
{
/*
* First, the mca_base_component_t struct containing meta
* information about the component itself
*/
{
MCA_MTL_BASE_VERSION_2_0_0,
"mxm", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
ompi_mtl_mxm_component_open, /* component open */
ompi_mtl_mxm_component_close, /* component close */
NULL,
ompi_mtl_mxm_component_register
},
{
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
ompi_mtl_mxm_component_init /* component init */
}
};
static int ompi_mtl_mxm_component_register(void)
{
mca_base_param_reg_int(&mca_mtl_mxm_component.super.mtl_version, "verbose",
"Verbose level of the MXM component",
false, false,
0,
&ompi_mtl_mxm.verbose);
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_open(void)
{
struct stat st;
/* Component available only if IB hardware is present */
if (0 == stat("/dev/infiniband/uverbs0", &st)) {
return OMPI_SUCCESS;
} else {
return OPAL_ERR_NOT_AVAILABLE;
}
}
static int ompi_mtl_mxm_component_close(void)
{
return OMPI_SUCCESS;
}
static mca_mtl_base_module_t*
ompi_mtl_mxm_component_init(bool enable_progress_threads,
bool enable_mpi_threads)
{
mxm_context_opts_t mxm_opts;
mxm_error_t err;
int rc;
mca_mtl_mxm_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
mxm_fill_context_opts(&mxm_opts);
err = mxm_init(&mxm_opts, &ompi_mtl_mxm.mxm_context);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "mxm init", true,
mxm_error_string(err));
return NULL;
}
rc = ompi_mtl_mxm_module_init();
if (OMPI_SUCCESS != rc) {
return NULL;
}
/* Calculate MTL constraints according to MXM types */
ompi_mtl_mxm.super.mtl_max_contextid = 1UL << (sizeof(mxm_ctxid_t) * 8);
ompi_mtl_mxm.super.mtl_max_tag = 1UL << (sizeof(mxm_tag_t) * 8 - 2);
ompi_mtl_mxm.super.mtl_request_size =
sizeof(mca_mtl_mxm_request_t) - sizeof(struct mca_mtl_request_t);
return &ompi_mtl_mxm.super;
}

34
ompi/mca/mtl/mxm/mtl_mxm_debug.h Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_DEBUG_H
#define MTL_MXM_DEBUG_H
#pragma GCC system_header
#ifdef __BASE_FILE__
#define __MXM_FILE__ __BASE_FILE__
#else
#define __MXM_FILE__ __FILE__
#endif
#define MXM_VERBOSE(level, format, ...) \
opal_output_verbose(level, mca_mtl_mxm_output, "%s:%d - %s() " format, \
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
#define MXM_ERROR(format, ... ) \
opal_output_verbose(0, mca_mtl_mxm_output, "Error: %s:%d - %s() " format, \
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
#define MXM_MODULE_VERBOSE(mxm_module, level, format, ...) \
MXM_VERBOSE(level, "[%d] " format, (mxm_module)->rank, ## __VA_ARGS__)
extern int mca_mtl_mxm_output;
#endif

42
ompi/mca/mtl/mxm/mtl_mxm_endpoint.c Обычный файл
Просмотреть файл

@ -0,0 +1,42 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_endpoint.h"
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_mtl_mxm_endpoint_construct(mca_mtl_mxm_endpoint_t* endpoint)
{
endpoint->mtl_mxm_module = NULL;
}
/*
* Destroy a endpoint
*
*/
static void mca_mtl_mxm_endpoint_destruct(mca_mtl_mxm_endpoint_t* endpoint)
{
}
OBJ_CLASS_INSTANCE(
mca_mtl_mxm_endpoint_t,
opal_list_item_t,
mca_mtl_mxm_endpoint_construct,
mca_mtl_mxm_endpoint_destruct);

43
ompi/mca/mtl/mxm/mtl_mxm_endpoint.h Обычный файл
Просмотреть файл

@ -0,0 +1,43 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_MTL_MXM_ENDPOINT_H
#define MCA_MTL_MXM_ENDPOINT_H
#include "opal/class/opal_list.h"
#include "opal/mca/event/event.h"
#include "ompi/mca/mtl/mtl.h"
#include "mtl_mxm.h"
BEGIN_C_DECLS
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint_t);
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_mtl_base_endpoint_t is associated w/ each process
* and MTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_mtl_base_endpoint_t {
opal_list_item_t super;
struct mca_mtl_mxm_module_t* mtl_mxm_module;
/**< MTL instance that created this connection */
mxm_conn_h mxm_conn;
/**< MXM Connection handle*/
};
typedef struct mca_mtl_base_endpoint_t mca_mtl_base_endpoint_t;
typedef mca_mtl_base_endpoint_t mca_mtl_mxm_endpoint_t;
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint);
END_C_DECLS
#endif

45
ompi/mca/mtl/mxm/mtl_mxm_probe.c Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "ompi/communicator/communicator.h"
int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src, int tag,
int *flag, struct ompi_status_public_t *status)
{
mxm_error_t err;
mxm_req_t req;
req.state = MXM_REQ_NEW;
req.mq = (mxm_mq_h)comm->c_pml_comm;
req.tag = tag;
req.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
req.conn = (src == MPI_ANY_SOURCE) ? NULL : ompi_mtl_mxm_conn_lookup(comm, src);
err = mxm_req_probe(&req);
if (MXM_OK == err) {
*flag = 1;
if (MPI_STATUS_IGNORE != status) {
status->MPI_SOURCE = *(int *)mxm_conn_get_context(req.conn);
status->MPI_TAG = req.completion.sender_tag;
status->MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req.completion.status);
status->_ucount = req.completion.actual_len;
}
return OMPI_SUCCESS;
} else if (MXM_ERR_NO_MESSAGE == err) {
*flag = 0;
return OMPI_SUCCESS;
} else {
return OMPI_ERROR;
}
}

90
ompi/mca/mtl/mxm/mtl_mxm_recv.c Обычный файл
Просмотреть файл

@ -0,0 +1,90 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "orte/util/show_help.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
static void ompi_mtl_mxm_recv_completion_cb(mxm_req_t *req)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
struct ompi_request_t *ompi_req = mtl_mxm_request->super.ompi_req;
/* Set completion status and envelope */
ompi_req->req_status.MPI_TAG = req->completion.sender_tag;
ompi_req->req_status.MPI_SOURCE = req->completion.sender_imm;
ompi_req->req_status.MPI_ERROR = ompi_mtl_mxm_to_mpi_status(req->completion.status);
ompi_req->req_status._ucount = req->completion.actual_len;
/* Copy data */
ompi_mtl_datatype_unpack(mtl_mxm_request->convertor, mtl_mxm_request->buf,
req->completion.actual_len);
if (mtl_mxm_request->free_after) {
free(mtl_mxm_request->buf);
}
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
}
int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src, int tag,
struct opal_convertor_t *convertor,
struct mca_mtl_request_t *mtl_request)
{
mca_mtl_mxm_request_t * mtl_mxm_request;
mca_mtl_mxm_endpoint_t* mxm_endpoint;
ompi_proc_t* ompi_proc;
mxm_error_t err;
int ret;
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mtl_mxm_request->convertor = convertor;
ret = ompi_mtl_datatype_recv_buf(mtl_mxm_request->convertor,
&mtl_mxm_request->buf,
&mtl_mxm_request->length,
&mtl_mxm_request->free_after);
if (OMPI_SUCCESS != ret) {
return ret;
}
/* prepare a receive request embedded in the MTL request */
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
mtl_mxm_request->mxm_request.mq = (mxm_mq_h)comm->c_pml_comm;
mtl_mxm_request->mxm_request.tag = tag;
mtl_mxm_request->mxm_request.tag_mask = (tag == MPI_ANY_TAG) ? 0 : 0xffffffffU;
mtl_mxm_request->mxm_request.conn = (src == MPI_ANY_SOURCE) ? NULL :
ompi_mtl_mxm_conn_lookup(comm, src);
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_recv_completion_cb;
mtl_mxm_request->mxm_request.context = mtl_mxm_request;
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
/* post-recv */
err = mxm_req_recv(&mtl_mxm_request->mxm_request);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error posting receive", true,
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

28
ompi/mca/mtl/mxm/mtl_mxm_request.h Обычный файл
Просмотреть файл

@ -0,0 +1,28 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MTL_MXM_REQUEST_H
#define OMPI_MTL_MXM_REQUEST_H
#include "opal/datatype/opal_convertor.h"
#include "mtl_mxm.h"
struct mca_mtl_mxm_request_t {
struct mca_mtl_request_t super;
mxm_req_t mxm_request;
/* mxm_segment_t mxm_segment[1]; */
void *buf;
size_t length;
struct opal_convertor_t *convertor;
bool free_after;
};
typedef struct mca_mtl_mxm_request_t mca_mtl_mxm_request_t;
#endif

136
ompi/mca/mtl/mxm/mtl_mxm_send.c Обычный файл
Просмотреть файл

@ -0,0 +1,136 @@
/* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/pml/pml.h"
#include "opal/datatype/opal_convertor.h"
#include "orte/util/show_help.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
static void ompi_mtl_mxm_send_completion_cb(mxm_req_t *req)
{
mca_mtl_mxm_request_t *mtl_mxm_request;
mtl_mxm_request = (mca_mtl_mxm_request_t *) req->context;
if (mtl_mxm_request->free_after) {
free(mtl_mxm_request->buf);
}
switch (req->completion.status) {
case MXM_OK:
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
= OMPI_SUCCESS;
break;
case MXM_ERR_MESSAGE_TRUNCATED:
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
= MPI_ERR_TRUNCATE;
break;
default:
mtl_mxm_request->super.ompi_req->req_status.MPI_ERROR
= MPI_ERR_INTERN;
break;
}
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
}
int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode)
{
mxm_req_t mxm_req;
bool free_after;
mxm_error_t err;
int ret;
/* prepare local send request */
mxm_req.state = MXM_REQ_NEW;
mxm_req.mq = ompi_mtl_mxm_mq_lookup(comm);
mxm_req.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
mxm_req.tag = tag;
mxm_req.imm_data = ompi_comm_rank(comm);
mxm_req.completed_cb = NULL;
mxm_req.flags = 0;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_req.flags |= MXM_REQ_FLAG_SEND_SYNC;
}
ret = ompi_mtl_datatype_pack(convertor, &mxm_req.data.buf.ptr, &mxm_req.data.buf.len,
&free_after);
if (OMPI_SUCCESS != ret) {
return ret;
}
/* post-send */
err = mxm_req_send(&mxm_req);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
return OMPI_ERROR;
}
/* wait for request completion */
err = mxm_req_wait(&mxm_req);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error while waiting in send", true, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode, bool blocking,
mca_mtl_request_t * mtl_request)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *)mtl_request;
mxm_error_t err;
int ret;
assert(mtl == &ompi_mtl_mxm.super);
mtl_mxm_request->convertor = convertor;
ret = ompi_mtl_datatype_pack(mtl_mxm_request->convertor,
&mtl_mxm_request->buf,
&mtl_mxm_request->length,
&mtl_mxm_request->free_after);
if (OMPI_SUCCESS != ret) {
return ret;
}
/* prepare a send request embedded in the MTL request */
mtl_mxm_request->mxm_request.state = MXM_REQ_NEW;
mtl_mxm_request->mxm_request.mq = ompi_mtl_mxm_mq_lookup(comm);
mtl_mxm_request->mxm_request.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
mtl_mxm_request->mxm_request.tag = tag;
mtl_mxm_request->mxm_request.imm_data = ompi_comm_rank(comm);
mtl_mxm_request->mxm_request.data.buf.ptr = mtl_mxm_request->buf;
mtl_mxm_request->mxm_request.data.buf.len = mtl_mxm_request->length;
mtl_mxm_request->mxm_request.completed_cb = ompi_mtl_mxm_send_completion_cb;
mtl_mxm_request->mxm_request.context = mtl_mxm_request;
mtl_mxm_request->mxm_request.flags = MXM_REQ_FLAG_NONBLOCK;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mtl_mxm_request->mxm_request.flags |= MXM_REQ_FLAG_SEND_SYNC;
}
/* post-send */
err = mxm_req_send(&mtl_mxm_request->mxm_request);
if (MXM_OK != err) {
orte_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

73
ompi/mca/mtl/mxm/mtl_mxm_types.h Обычный файл
Просмотреть файл

@ -0,0 +1,73 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
#define MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "ompi/communicator/communicator.h"
#include "mtl_mxm_endpoint.h"
BEGIN_C_DECLS
/**
* MTL Module Interface
*/
typedef struct mca_mtl_mxm_module_t {
mca_mtl_base_module_t super; /**< base MTL interface */
int verbose;
mxm_h mxm_context;
mxm_ep_h ep;
} mca_mtl_mxm_module_t;
typedef struct ompi_mtl_mxm_ep_conn_info_t {
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
} ompi_mtl_mxm_ep_conn_info_t;
extern mca_mtl_mxm_module_t ompi_mtl_mxm;
typedef struct mca_mtl_mxm_component_t {
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
} mca_mtl_mxm_component_t;
OMPI_DECLSPEC mca_mtl_mxm_component_t mca_mtl_mxm_component;
static inline mxm_conn_h ompi_mtl_mxm_conn_lookup(struct ompi_communicator_t* comm, int rank) {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup(comm, rank);
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_pml;
return endpoint->mxm_conn;
}
static inline mxm_mq_h ompi_mtl_mxm_mq_lookup(struct ompi_communicator_t* comm) {
return (mxm_mq_h)comm->c_pml_comm;
}
static inline int ompi_mtl_mxm_to_mpi_status(mxm_error_t status) {
if (MXM_OK == status) {
return OMPI_SUCCESS;
} else if (MXM_ERR_MESSAGE_TRUNCATED == status) {
return MPI_ERR_TRUNCATE;
} else {
return MPI_ERR_INTERN;
}
}
END_C_DECLS
#endif