1
1

Merge pull request #5180 from bwbarrett/feature/remove-mxm

mtl: remove MXM MTL
Этот коммит содержится в:
Joshua Ladd 2018-05-23 09:31:42 -04:00 коммит произвёл GitHub
родитель 385d91bbd2 09e4c40ce9
Коммит 8231706ad8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
16 изменённых файлов: 0 добавлений и 2135 удалений

Просмотреть файл

@ -1,51 +0,0 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(mtl_mxm_CPPFLAGS)
dist_ompidata_DATA = help-mtl-mxm.txt
mtl_mxm_sources = \
mtl_mxm.c \
mtl_mxm.h \
mtl_mxm_cancel.c \
mtl_mxm_component.c \
mtl_mxm_endpoint.c \
mtl_mxm_endpoint.h \
mtl_mxm_probe.c \
mtl_mxm_recv.c \
mtl_mxm_request.h \
mtl_mxm_send.c \
mtl_mxm_debug.h \
mtl_mxm_types.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_mtl_mxm_DSO
component_noinst =
component_install = mca_mtl_mxm.la
else
component_noinst = libmca_mtl_mxm.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
mca_mtl_mxm_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \
$(mtl_mxm_LIBS)
mca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
libmca_mtl_mxm_la_LIBADD = $(mtl_mxm_LIBS)
libmca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)

Просмотреть файл

@ -1,39 +0,0 @@
# -*- shell-script -*-
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ompi_mtl_mxm_POST_CONFIG(will_build)
# ----------------------------------------
# Only require the tag if we're actually going to be built
AC_DEFUN([MCA_ompi_mtl_mxm_POST_CONFIG], [
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([MTL])])
])dnl
# MCA_mtl_mxm_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_ompi_mtl_mxm_CONFIG],[
AC_CONFIG_FILES([ompi/mca/mtl/mxm/Makefile])
OMPI_CHECK_MXM([mtl_mxm],
[mtl_mxm_happy="yes"],
[mtl_mxm_happy="no"])
AS_IF([test "$mtl_mxm_happy" = "yes"],
[$1],
[$2])
# substitute in the things needed to build mxm
AC_SUBST([mtl_mxm_CFLAGS])
AC_SUBST([mtl_mxm_CPPFLAGS])
AC_SUBST([mtl_mxm_LDFLAGS])
AC_SUBST([mtl_mxm_LIBS])
])dnl

Просмотреть файл

@ -1,67 +0,0 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[no uuid present]
Error obtaining unique transport key from ORTE (orte_precondition_transports %s
the environment).
Local host: %s
[unable to create endpoint]
MXM was unable to create an endpoint. Please make sure that the network link is
active on the node and the hardware is functioning.
Error: %s
[unable to extract endpoint ptl address]
MXM was unable to read settings for endpoint
PTL ID: %d
Error: %s
[unable to extract endpoint address]
MXM was unable to read settings for endpoint
Error: %s
[mxm mq create]
Failed to create MQ for endpoint
Error: %s
[errors during mxm_progress]
Error %s occurred in attempting to make network progress (mxm_progress).
[mxm init]
Initialization of MXM library failed.
Error: %s
[error posting receive]
Unable to post application receive buffer
Error: %s
Buffer: %p
Length: %d
[error posting message receive]
Unable to post application receive buffer
Error: %s
Buffer: %p
Length: %d
[error posting send]
Unable to post application send buffer
Error: %s

Просмотреть файл

@ -1,679 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
#include "opal/memoryhooks/memory.h"
#include "opal/util/show_help.h"
#include "opal/mca/pmix/pmix.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_endpoint.h"
#include "mtl_mxm_request.h"
mca_mtl_mxm_module_t ompi_mtl_mxm = {
{
0, /* max context id */
0, /* max tag value */
0, /* request reserve space */
0, /* flags */
ompi_mtl_mxm_add_procs,
ompi_mtl_mxm_del_procs,
ompi_mtl_mxm_finalize,
ompi_mtl_mxm_send,
ompi_mtl_mxm_isend,
ompi_mtl_mxm_irecv,
ompi_mtl_mxm_iprobe,
ompi_mtl_mxm_imrecv,
ompi_mtl_mxm_improbe,
ompi_mtl_mxm_cancel,
ompi_mtl_mxm_add_comm,
ompi_mtl_mxm_del_comm
},
0,
0,
NULL,
NULL
};
#if MXM_API < MXM_VERSION(2,0)
static uint32_t ompi_mtl_mxm_get_job_id(void)
{
uint8_t unique_job_key[16];
uint32_t job_key;
unsigned long long *uu;
char *generated_key;
uu = (unsigned long long *) unique_job_key;
generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
memset(uu, 0, sizeof(unique_job_key));
if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) {
opal_show_help("help-mtl-mxm.txt", "no uuid present", true,
generated_key ? "could not be parsed from" :
"not present in", ompi_process_info.nodename);
return 0;
}
/*
* decode OPAL_MCA_PREFIX"orte_precondition_transports" that looks as
* 000003ca00000000-0000000100000000
* jobfam-stepid
* to get jobid coded with ORTE_CONSTRUCT_LOCAL_JOBID()
*/
#define GET_LOCAL_JOBID(local, job) \
( ((local) & 0xffff0000) | ((job) & 0x0000ffff) )
job_key = GET_LOCAL_JOBID((uu[0]>>(8 * sizeof(int))) << 16, uu[1]>>(8 * sizeof(int)));
return job_key;
}
#endif
int ompi_mtl_mxm_progress(void);
#if MXM_API >= MXM_VERSION(2,0)
static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
void *cbdata, bool from_alloc);
#endif
#if MXM_API < MXM_VERSION(2,0)
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
{
size_t addrlen;
mxm_error_t err;
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "unable to extract endpoint ptl address",
true, (int)ptlid, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
#else
static int ompi_mtl_mxm_get_ep_address(void **address_p, size_t *address_len_p)
{
mxm_error_t err;
*address_len_p = 0;
err = mxm_ep_get_address(ompi_mtl_mxm.ep, NULL, address_len_p);
if (err != MXM_ERR_BUFFER_TOO_SMALL) {
MXM_ERROR("Failed to get ep address length");
return OMPI_ERROR;
}
*address_p = malloc(*address_len_p);
if (*address_p == NULL) {
MXM_ERROR("Failed to allocate ep address buffer");
return OMPI_ERR_OUT_OF_RESOURCE;
}
err = mxm_ep_get_address(ompi_mtl_mxm.ep, *address_p, address_len_p);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
true, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
#endif
#define max(a,b) ((a)>(b)?(a):(b))
static mxm_error_t
ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_bitmap, int lr,
uint32_t jobid, uint64_t mxlr, int nlps)
{
mxm_error_t err;
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm.mxm_ep_opts->job_id = jobid;
ompi_mtl_mxm.mxm_ep_opts->local_rank = lr;
ompi_mtl_mxm.mxm_ep_opts->num_local_procs = nlps;
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
#else
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
#endif
return err;
}
/*
* send information using modex (in some case there is limitation on data size for example ess/pmi)
* set size of data sent for once
*
*/
static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len)
{
char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version);
char *modex_name = malloc(strlen(modex_component_name) + 5);
const size_t modex_max_size = 0x60;
unsigned char *modex_buf_ptr;
size_t modex_buf_size;
size_t modex_cur_size;
int modex_name_id = 0;
int rc;
/* Send address length */
sprintf(modex_name, "%s-len", modex_component_name);
OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL,
modex_name, &address_len, sizeof(address_len));
if (OMPI_SUCCESS != rc) {
MXM_ERROR("failed to send address length");
goto bail;
}
/* Send address, in parts.
* modex name looks as mtl.mxm.1.5-18 where mtl.mxm.1.5 is the component and 18 is part index.
*/
modex_buf_size = address_len;
modex_buf_ptr = address;
while (modex_buf_size) {
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
modex_cur_size = (modex_buf_size < modex_max_size) ? modex_buf_size : modex_max_size;
OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL,
modex_name, modex_buf_ptr, modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
goto bail;
}
modex_name_id++;
modex_buf_ptr += modex_cur_size;
modex_buf_size -= modex_cur_size;
}
rc = OMPI_SUCCESS;
bail:
free(modex_component_name);
free(modex_name);
return rc;
}
/*
* recieve information using modex
*/
static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address_p,
size_t *address_len_p)
{
char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version);
char *modex_name = malloc(strlen(modex_component_name) + 5);
uint8_t *modex_buf_ptr;
int32_t modex_cur_size;
size_t modex_buf_size;
size_t *address_len_buf_ptr;
int modex_name_id = 0;
int rc;
*address_p = NULL;
*address_len_p = 0;
/* Receive address length */
sprintf(modex_name, "%s-len", modex_component_name);
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name,
(uint8_t **)&address_len_buf_ptr,
&modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Failed to receive ep address length");
goto bail;
}
/* Allocate buffer to hold the address */
*address_len_p = *address_len_buf_ptr;
*address_p = malloc(*address_len_p);
if (*address_p == NULL) {
MXM_ERROR("Failed to allocate modex receive buffer");
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto bail;
}
/* Receive the data, in parts */
modex_buf_size = 0;
while (modex_buf_size < *address_len_p) {
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name,
&modex_buf_ptr,
&modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
free(*address_p);
*address_p = NULL;
*address_len_p = 0;
goto bail;
}
memcpy((char*)(*address_p) + modex_buf_size, modex_buf_ptr, modex_cur_size);
modex_buf_size += modex_cur_size;
modex_name_id++;
}
rc = OMPI_SUCCESS;
bail:
free(modex_component_name);
free(modex_name);
return rc;
}
int ompi_mtl_mxm_module_init(void)
{
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm_ep_conn_info_t ep_info;
#endif
void *ep_address;
size_t ep_address_len;
mxm_error_t err;
uint32_t jobid;
uint64_t mxlr;
ompi_proc_t **procs;
unsigned ptl_bitmap;
size_t totps, proc;
int lr, nlps;
int rc;
mxlr = 0;
lr = -1;
jobid = 0;
#if MXM_API < MXM_VERSION(2,0)
jobid = ompi_mtl_mxm_get_job_id();
if (0 == jobid) {
MXM_ERROR("Failed to generate jobid");
return OMPI_ERROR;
}
#endif
totps = ompi_proc_world_size ();
if (totps < (size_t)ompi_mtl_mxm.mxm_np) {
MXM_VERBOSE(1, "MXM support will be disabled because of total number "
"of processes (%lu) is less than the minimum set by the "
"mtl_mxm_np MCA parameter (%u)", totps, ompi_mtl_mxm.mxm_np);
return OMPI_ERR_NOT_SUPPORTED;
}
MXM_VERBOSE(1, "MXM support enabled");
if (ORTE_NODE_RANK_INVALID == (lr = ompi_process_info.my_node_rank)) {
MXM_ERROR("Unable to obtain local node rank");
return OMPI_ERROR;
}
nlps = ompi_process_info.num_local_peers + 1;
/* local procs are always allocated. if that ever changes this will need to
* be modified. */
procs = ompi_proc_get_allocated (&totps);
if (NULL == procs) {
MXM_ERROR("Unable to obtain process list");
return OMPI_ERROR;
}
for (proc = 0; proc < totps; proc++) {
if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags)) {
mxlr = max(mxlr, procs[proc]->super.proc_name.vpid);
}
}
free(procs);
/* Setup the endpoint options and local addresses to bind to. */
#if MXM_API < MXM_VERSION(2,0)
ptl_bitmap = ompi_mtl_mxm.mxm_ctx_opts->ptl_bitmap;
#else
ptl_bitmap = 0;
#endif
/* Open MXM endpoint */
err = ompi_mtl_mxm_create_ep(ompi_mtl_mxm.mxm_context, &ompi_mtl_mxm.ep,
ptl_bitmap, lr, jobid, mxlr, nlps);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
mxm_error_string(err));
return OMPI_ERROR;
}
/*
* Get address for each PTL on this endpoint, and share it with other ranks.
*/
#if MXM_API < MXM_VERSION(2,0)
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
return OMPI_ERROR;
}
if ((ptl_bitmap & MXM_BIT(MXM_PTL_RDMA)) &&
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_RDMA)) {
return OMPI_ERROR;
}
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SHM)) &&
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
return OMPI_ERROR;
}
ep_address = &ep_info;
ep_address_len = sizeof(ep_info);
#else
rc = ompi_mtl_mxm_get_ep_address(&ep_address, &ep_address_len);
if (OMPI_SUCCESS != rc) {
return rc;
}
#endif
rc = ompi_mtl_mxm_send_ep_address(ep_address, ep_address_len);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Modex session failed.");
return rc;
}
#if MXM_API >= MXM_VERSION(2,0)
free(ep_address);
#endif
/* Register the MXM progress function */
opal_progress_register(ompi_mtl_mxm_progress);
ompi_mtl_mxm.super.mtl_flags |= MCA_MTL_BASE_FLAG_REQUIRE_WORLD;
#if MXM_API >= MXM_VERSION(2,0)
if (ompi_mtl_mxm.using_mem_hooks) {
opal_mem_hooks_register_release(ompi_mtl_mxm_mem_release_cb, NULL);
}
#endif
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl)
{
#if MXM_API >= MXM_VERSION(2,0)
if (ompi_mtl_mxm.using_mem_hooks) {
opal_mem_hooks_unregister_release(ompi_mtl_mxm_mem_release_cb);
}
#endif
opal_progress_unregister(ompi_mtl_mxm_progress);
mxm_ep_destroy(ompi_mtl_mxm.ep);
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
struct ompi_proc_t** procs)
{
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm_ep_conn_info_t *ep_info;
mxm_conn_req_t *conn_reqs;
size_t ep_index = 0;
#endif
void *ep_address = NULL;
size_t ep_address_len;
mxm_error_t err;
size_t i;
int rc;
mca_mtl_mxm_endpoint_t *endpoint;
assert(mtl == &ompi_mtl_mxm.super);
#if MXM_API < MXM_VERSION(2,0)
/* Allocate connection requests */
conn_reqs = calloc(nprocs, sizeof(mxm_conn_req_t));
ep_info = calloc(nprocs, sizeof(ompi_mtl_mxm_ep_conn_info_t));
if (NULL == conn_reqs || NULL == ep_info) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto bail;
}
#endif
/* Get the EP connection requests for all the processes from modex */
for (i = 0; i < nprocs; ++i) {
if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
continue; /* already connected to this endpoint */
}
rc = ompi_mtl_mxm_recv_ep_address(procs[i], &ep_address, &ep_address_len);
if (rc != OMPI_SUCCESS) {
goto bail;
}
#if MXM_API < MXM_VERSION(2,0)
if (ep_address_len != sizeof(ep_info[i])) {
MXM_ERROR("Invalid endpoint address length");
free(ep_address);
rc = OMPI_ERROR;
goto bail;
}
memcpy(&ep_info[i], ep_address, ep_address_len);
free(ep_address);
conn_reqs[ep_index].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
conn_reqs[ep_index].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
conn_reqs[ep_index].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
ep_index++;
#else
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &endpoint->mxm_conn);
free(ep_address);
if (err != MXM_OK) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
rc = OMPI_ERROR;
goto bail;
}
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
#endif
}
#if MXM_API < MXM_VERSION(2,0)
/* Connect to remote peers */
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, ep_index, -1);
if (MXM_OK != err) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
for (i = 0; i < ep_index; ++i) {
if (MXM_OK != conn_reqs[i].error) {
MXM_ERROR("MXM EP connect to %s error: %s\n",
(NULL == procs[i]->super.proc_hostname) ?
"unknown" : procs[i]->proc_hostname,
mxm_error_string(conn_reqs[i].error));
}
}
rc = OMPI_ERROR;
goto bail;
}
/* Save returned connections */
for (i = 0; i < ep_index; ++i) {
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
endpoint->mxm_conn = conn_reqs[i].conn;
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
}
#endif
#if MXM_API >= MXM_VERSION(3,1)
if (ompi_mtl_mxm.bulk_connect) {
mxm_ep_wireup(ompi_mtl_mxm.ep);
}
#endif
rc = OMPI_SUCCESS;
bail:
#if MXM_API < MXM_VERSION(2,0)
free(conn_reqs);
free(ep_info);
#endif
return rc;
}
int ompi_mtl_add_single_proc(struct mca_mtl_base_module_t *mtl,
struct ompi_proc_t* procs)
{
void *ep_address = NULL;
size_t ep_address_len;
mxm_error_t err;
int rc;
mca_mtl_mxm_endpoint_t *endpoint;
assert(mtl == &ompi_mtl_mxm.super);
if (NULL != procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
return OMPI_SUCCESS;
}
rc = ompi_mtl_mxm_recv_ep_address(procs, &ep_address, &ep_address_len);
if (rc != OMPI_SUCCESS) {
return rc;
}
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm_ep_conn_info_t ep_info;
mxm_conn_req_t conn_req;
if (ep_address_len != sizeof(ep_info)) {
MXM_ERROR("Invalid endpoint address length");
free(ep_address);
return OMPI_ERROR;
}
memcpy(&ep_info, ep_address, ep_address_len);
free(ep_address);
conn_req.ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_SELF]);
conn_req.ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_SHM]);
conn_req.ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_RDMA]);
/* Connect to remote peers */
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_req, 1, -1);
if (MXM_OK != err) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
if (MXM_OK != conn_req.error) {
MXM_ERROR("MXM EP connect to %s error: %s\n",
(NULL == procs->super.proc_hostname) ?
"unknown" : procs->proc_hostname,
mxm_error_string(conn_reqs.error));
}
return OMPI_ERROR;
}
/* Save returned connections */
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
endpoint->mxm_conn = conn_reqs.conn;
procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
#else
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &endpoint->mxm_conn);
free(ep_address);
if (err != MXM_OK) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
return OMPI_ERROR;
}
procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
#endif
#if MXM_API >= MXM_VERSION(3,1)
if (ompi_mtl_mxm.bulk_connect) {
mxm_ep_wireup(ompi_mtl_mxm.ep);
}
#endif
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
struct ompi_proc_t** procs)
{
size_t i;
#if MXM_API >= MXM_VERSION(3,1)
if (ompi_mtl_mxm.bulk_disconnect && ((int)nprocs) == ompi_proc_world_size ()) {
mxm_ep_powerdown(ompi_mtl_mxm.ep);
}
#endif
/* XXX: Directly accessing the obj_reference_count is an abstraction
* violation of the object system. We know this needs to be fixed, but
* are deferring the fix to a later time as it involves a design issue
* in the way we handle endpoints as objects
*/
for (i = 0; i < nprocs; ++i) {
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*)
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
if (endpoint) {
mxm_ep_disconnect(endpoint->mxm_conn);
OBJ_RELEASE(endpoint);
}
}
opal_pmix.fence(NULL, 0);
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm)
{
mxm_error_t err;
mxm_mq_h mq;
assert(mtl == &ompi_mtl_mxm.super);
assert(NULL != ompi_mtl_mxm.mxm_context);
err = mxm_mq_create(ompi_mtl_mxm.mxm_context, comm->c_contextid, &mq);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "mxm mq create", true, mxm_error_string(err));
return OMPI_ERROR;
}
comm->c_pml_comm = (void*)mq;
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm)
{
assert(mtl == &ompi_mtl_mxm.super);
if (NULL != ompi_mtl_mxm.mxm_context) {
mxm_mq_destroy((mxm_mq_h)comm->c_pml_comm);
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_progress(void)
{
mxm_error_t err;
err = mxm_progress(ompi_mtl_mxm.mxm_context);
if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err) ) {
opal_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err));
}
return 1;
}
#if MXM_API >= MXM_VERSION(2,0)
static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
void *cbdata, bool from_alloc)
{
mxm_mem_unmap(ompi_mtl_mxm.mxm_context, buf, length,
from_alloc ? MXM_MEM_UNMAP_MARK_INVALID : 0);
}
#endif
OBJ_CLASS_INSTANCE(
ompi_mtl_mxm_message_t,
opal_free_list_item_t,
NULL,
NULL);

Просмотреть файл

@ -1,117 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_H_HAS_BEEN_INCLUDED
#define MTL_MXM_H_HAS_BEEN_INCLUDED
#include <stdint.h>
#include <sys/types.h>
#include <unistd.h>
#include <mxm/api/mxm_api.h>
#ifndef MXM_VERSION
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
#endif
#if MXM_API < MXM_VERSION(1,5)
#error "Unsupported MXM version, version 1.5 or above required"
#endif
#if MXM_API < MXM_VERSION(2,0)
#include <mxm/api/mxm_addr.h>
#endif
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "opal/class/opal_free_list.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/datatype/opal_convertor.h"
#include "mtl_mxm_debug.h"
BEGIN_C_DECLS
/* MTL interface functions */
extern int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t* mtl,
size_t nprocs, struct ompi_proc_t** procs);
extern int ompi_mtl_add_single_proc(struct mca_mtl_base_module_t *mtl,
struct ompi_proc_t* procs);
extern int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t* mtl,
size_t nprocs, struct ompi_proc_t** procs);
extern int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode);
extern int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest,
int tag, struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode, bool blocking,
mca_mtl_request_t * mtl_request);
extern int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src,
int tag, struct opal_convertor_t *convertor,
struct mca_mtl_request_t *mtl_request);
extern int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src,
int tag, int *flag,
struct ompi_status_public_t *status);
extern int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
struct mca_mtl_request_t *mtl_request, int flag);
extern int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
struct opal_convertor_t *convertor,
struct ompi_message_t **message,
struct mca_mtl_request_t *mtl_request);
extern int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm,
int src,
int tag,
int *matched,
struct ompi_message_t **message,
struct ompi_status_public_t *status);
extern int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm);
extern int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm);
extern int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl);
int ompi_mtl_mxm_module_init(void);
struct ompi_mtl_mxm_message_t {
opal_free_list_item_t super;
mxm_mq_h mq;
mxm_conn_h conn;
mxm_message_h mxm_msg;
mxm_tag_t tag;
mxm_tag_t tag_mask;
};
typedef struct ompi_mtl_mxm_message_t ompi_mtl_mxm_message_t;
OBJ_CLASS_DECLARATION(ompi_mtl_mxm_message_t);
END_C_DECLS
#endif

Просмотреть файл

@ -1,34 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "mtl_mxm_request.h"
int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
struct mca_mtl_request_t *mtl_request, int flag)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mxm_error_t err;
#if MXM_API >= MXM_VERSION(2,0)
if (mtl_mxm_request->is_send) {
err = mxm_req_cancel_send(&mtl_mxm_request->mxm.send);
} else {
err = mxm_req_cancel_recv(&mtl_mxm_request->mxm.recv);
}
#else
err = mxm_req_cancel(&mtl_mxm_request->mxm.base);
#endif
if ((err != MXM_OK) && (err != MXM_ERR_NO_PROGRESS)) {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,316 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "ompi/proc/proc.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/memory/base/base.h"
#include "ompi/runtime/mpiruntime.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
static int ompi_mtl_mxm_component_open(void);
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority);
static int ompi_mtl_mxm_component_close(void);
static int ompi_mtl_mxm_component_register(void);
static int param_priority;
int mca_mtl_mxm_output = -1;
static mca_mtl_base_module_t
* ompi_mtl_mxm_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
mca_mtl_mxm_component_t mca_mtl_mxm_component = {
{
/*
* First, the mca_base_component_t struct containing meta
* information about the component itself
*/
.mtl_version = {
MCA_MTL_BASE_VERSION_2_0_0,
.mca_component_name = "mxm",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
.mca_open_component = ompi_mtl_mxm_component_open,
.mca_close_component = ompi_mtl_mxm_component_close,
.mca_query_component = ompi_mtl_mxm_component_query,
.mca_register_component_params = ompi_mtl_mxm_component_register,
},
.mtl_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
.mtl_init = ompi_mtl_mxm_component_init,
}
};
static int ompi_mtl_mxm_component_register(void)
{
mca_base_component_t*c;
#if MXM_API < MXM_VERSION(3,0)
unsigned long cur_ver;
long major, minor;
char* runtime_version;
#endif
c = &mca_mtl_mxm_component.super.mtl_version;
ompi_mtl_mxm.verbose = 0;
(void) mca_base_component_var_register(c, "verbose",
"Verbose level of the MXM component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_mtl_mxm.verbose);
#if MXM_API > MXM_VERSION(2,0)
ompi_mtl_mxm.mxm_np = 0;
#else
ompi_mtl_mxm.mxm_np = 128;
#endif
(void) mca_base_component_var_register(c, "np",
"[integer] Minimal number of MPI processes in a single job "
"required to activate the MXM transport",
MCA_BASE_VAR_TYPE_INT, NULL,0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.mxm_np);
ompi_mtl_mxm.compiletime_version = MXM_VERNO_STRING;
(void) mca_base_component_var_register(c,
MCA_COMPILETIME_VER,
"Version of the libmxm library with which Open MPI was compiled",
MCA_BASE_VAR_TYPE_VERSION_STRING,
NULL, 0, 0,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.compiletime_version);
#if MXM_API >= MXM_VERSION(3,0)
ompi_mtl_mxm.runtime_version = (char *)mxm_get_version_string();
#else
cur_ver = mxm_get_version();
major = (cur_ver >> MXM_MAJOR_BIT) & 0xff;
minor = (cur_ver >> MXM_MINOR_BIT) & 0xff;
asprintf(&runtime_version, "%ld.%ld", major, minor);
ompi_mtl_mxm.runtime_version = runtime_version;
#endif
(void) mca_base_component_var_register(c,
MCA_RUNTIME_VER,
"Version of the libmxm library with which Open MPI is running",
MCA_BASE_VAR_TYPE_VERSION_STRING,
NULL, 0, 0,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.runtime_version);
#if MXM_API < MXM_VERSION(3,0)
free(runtime_version);
#endif
/* set high enought to defeat ob1's default */
param_priority = 30;
(void) mca_base_component_var_register (c,
"priority", "Priority of the MXM MTL component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&param_priority);
#if MXM_API >= MXM_VERSION(3,1)
{
unsigned long cur_ver = mxm_get_version();
ompi_mtl_mxm.bulk_connect = 0;
if (cur_ver < MXM_VERSION(3,2)) {
ompi_mtl_mxm.bulk_disconnect = 0;
} else {
ompi_mtl_mxm.bulk_disconnect = 1;
}
(void) mca_base_component_var_register(c, "bulk_connect",
"[integer] use bulk connect",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.bulk_connect);
(void) mca_base_component_var_register(c, "bulk_disconnect",
"[integer] use bulk disconnect",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.bulk_disconnect);
if (cur_ver < MXM_VERSION(3,2) &&
(ompi_mtl_mxm.bulk_connect || ompi_mtl_mxm.bulk_disconnect)) {
ompi_mtl_mxm.bulk_connect = 0;
ompi_mtl_mxm.bulk_disconnect = 0;
MXM_VERBOSE(1, "WARNING: OMPI runs with %s version of MXM that is less than 3.2, "
"so bulk connect/disconnect cannot work properly and will be turn off.",
ompi_mtl_mxm.runtime_version);
}
}
#endif
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_open(void)
{
mxm_error_t err;
unsigned long cur_ver;
int rc;
mca_mtl_mxm_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
cur_ver = mxm_get_version();
if (cur_ver != MXM_API) {
MXM_VERBOSE(1,
"WARNING: OMPI was compiled with MXM version %d.%d but version %ld.%ld detected.",
MXM_VERNO_MAJOR,
MXM_VERNO_MINOR,
(cur_ver >> MXM_MAJOR_BIT) & 0xff,
(cur_ver >> MXM_MINOR_BIT) & 0xff);
}
#if MXM_API >= MXM_VERSION(2,0)
(void)mca_base_framework_open(&opal_memory_base_framework, 0);
/* Register memory hooks */
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
opal_mem_hooks_support_level()))
{
setenv("MXM_MPI_MEM_ON_DEMAND_MAP", "y", 0);
MXM_VERBOSE(1, "Enabling on-demand memory mapping");
ompi_mtl_mxm.using_mem_hooks = 1;
} else {
MXM_VERBOSE(1, "Disabling on-demand memory mapping");
ompi_mtl_mxm.using_mem_hooks = 0;
}
setenv("MXM_MPI_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y" , 0);
#endif
#if MXM_API >= MXM_VERSION(2,1)
if (MXM_OK != mxm_config_read_opts(&ompi_mtl_mxm.mxm_ctx_opts,
&ompi_mtl_mxm.mxm_ep_opts,
"MPI", NULL, 0))
#else
if ((MXM_OK != mxm_config_read_context_opts(&ompi_mtl_mxm.mxm_ctx_opts)) ||
(MXM_OK != mxm_config_read_ep_opts(&ompi_mtl_mxm.mxm_ep_opts)))
#endif
{
MXM_ERROR("Failed to parse MXM configuration");
return OPAL_ERR_BAD_PARAM;
}
err = mxm_init(ompi_mtl_mxm.mxm_ctx_opts, &ompi_mtl_mxm.mxm_context);
MXM_VERBOSE(1, "mxm component open");
if (MXM_OK != err) {
if (MXM_ERR_NO_DEVICE == err) {
MXM_VERBOSE(1, "No supported device found, disqualifying mxm");
} else {
opal_show_help("help-mtl-mxm.txt", "mxm init", true,
mxm_error_string(err));
}
return OPAL_ERR_NOT_AVAILABLE;
}
OBJ_CONSTRUCT(&mca_mtl_mxm_component.mxm_messages, opal_free_list_t);
rc = opal_free_list_init (&mca_mtl_mxm_component.mxm_messages,
sizeof(ompi_mtl_mxm_message_t),
opal_cache_line_size,
OBJ_CLASS(ompi_mtl_mxm_message_t),
0, opal_cache_line_size,
32 /* free list num */,
-1 /* free list max */,
32 /* free list inc */,
NULL, 0, NULL, NULL, NULL);
if (OMPI_SUCCESS != rc) {
opal_show_help("help-mtl-mxm.txt", "mxm init", true,
mxm_error_string(err));
return OPAL_ERR_NOT_AVAILABLE;
}
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority)
{
/*
* if we get here it means that mxm is available so give high priority
*/
ompi_mpi_dynamics_disable("the MXM MTL does not support MPI dynamic process functionality");
*priority = param_priority;
*module = (mca_base_module_t *)&ompi_mtl_mxm.super;
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_close(void)
{
if (ompi_mtl_mxm.mxm_context != NULL) {
mxm_cleanup(ompi_mtl_mxm.mxm_context);
ompi_mtl_mxm.mxm_context = NULL;
OBJ_DESTRUCT(&mca_mtl_mxm_component.mxm_messages);
#if MXM_API >= MXM_VERSION(2,0)
mxm_config_free_ep_opts(ompi_mtl_mxm.mxm_ep_opts);
mxm_config_free_context_opts(ompi_mtl_mxm.mxm_ctx_opts);
mca_base_framework_close(&opal_memory_base_framework);
#else
mxm_config_free(ompi_mtl_mxm.mxm_ep_opts);
mxm_config_free(ompi_mtl_mxm.mxm_ctx_opts);
#endif
}
return OMPI_SUCCESS;
}
static mca_mtl_base_module_t*
ompi_mtl_mxm_component_init(bool enable_progress_threads,
bool enable_mpi_threads)
{
int rc;
rc = ompi_mtl_mxm_module_init();
if (OMPI_SUCCESS != rc) {
return NULL;
}
/* Calculate MTL constraints according to MXM types */
ompi_mtl_mxm.super.mtl_max_contextid = 1UL << (sizeof(mxm_ctxid_t) * 8);
ompi_mtl_mxm.super.mtl_max_tag = 1UL << (sizeof(mxm_tag_t) * 8 - 2);
ompi_mtl_mxm.super.mtl_request_size =
sizeof(mca_mtl_mxm_request_t) - sizeof(struct mca_mtl_request_t);
return &ompi_mtl_mxm.super;
}

Просмотреть файл

@ -1,34 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_DEBUG_H
#define MTL_MXM_DEBUG_H
#pragma GCC system_header
#ifdef __BASE_FILE__
#define __MXM_FILE__ __BASE_FILE__
#else
#define __MXM_FILE__ __FILE__
#endif
#define MXM_VERBOSE(level, format, ...) \
opal_output_verbose(level, mca_mtl_mxm_output, "%s:%d - %s() " format, \
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
#define MXM_ERROR(format, ... ) \
opal_output_verbose(0, mca_mtl_mxm_output, "Error: %s:%d - %s() " format, \
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
#define MXM_MODULE_VERBOSE(mxm_module, level, format, ...) \
MXM_VERBOSE(level, "[%d] " format, (mxm_module)->rank, ## __VA_ARGS__)
extern int mca_mtl_mxm_output;
#endif

Просмотреть файл

@ -1,42 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_endpoint.h"
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_mtl_mxm_endpoint_construct(mca_mtl_mxm_endpoint_t* endpoint)
{
endpoint->mtl_mxm_module = NULL;
}
/*
* Destroy a endpoint
*
*/
static void mca_mtl_mxm_endpoint_destruct(mca_mtl_mxm_endpoint_t* endpoint)
{
}
OBJ_CLASS_INSTANCE(
mca_mtl_mxm_endpoint_t,
opal_list_item_t,
mca_mtl_mxm_endpoint_construct,
mca_mtl_mxm_endpoint_destruct);

Просмотреть файл

@ -1,41 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_MTL_MXM_ENDPOINT_H
#define MCA_MTL_MXM_ENDPOINT_H
#include "opal/class/opal_list.h"
#include "ompi/mca/mtl/mtl.h"
#include "mtl_mxm.h"
BEGIN_C_DECLS
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint_t);
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_mtl_mxm_endpoint_t is associated w/ each process
* and MTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_mtl_mxm_endpoint_t {
opal_list_item_t super;
struct mca_mtl_mxm_module_t* mtl_mxm_module;
/**< MTL instance that created this connection */
mxm_conn_h mxm_conn;
/**< MXM Connection handle*/
};
typedef struct mca_mtl_mxm_endpoint_t mca_mtl_mxm_endpoint_t;
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint);
END_C_DECLS
#endif

Просмотреть файл

@ -1,115 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "ompi/message/message.h"
#include "ompi/communicator/communicator.h"
int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src, int tag,
int *flag, struct ompi_status_public_t *status)
{
mxm_error_t err;
mxm_recv_req_t req;
req.base.state = MXM_REQ_NEW;
ompi_mtl_mxm_set_recv_envelope(&req, comm, src, tag);
err = mxm_req_probe(&req);
if (MXM_OK == err) {
*flag = 1;
if (MPI_STATUS_IGNORE != status) {
ompi_mtl_mxm_to_mpi_status(err, status);
status->MPI_SOURCE = req.completion.sender_imm;
status->MPI_TAG = req.completion.sender_tag;
status->_ucount = req.completion.sender_len;
}
return OMPI_SUCCESS;
} else if (MXM_ERR_NO_MESSAGE == err) {
*flag = 0;
return OMPI_SUCCESS;
} else {
return OMPI_ERROR;
}
}
int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm,
int src,
int tag,
int *matched,
struct ompi_message_t **message,
struct ompi_status_public_t *status)
{
mxm_error_t err;
mxm_recv_req_t req;
opal_free_list_item_t *item;
ompi_mtl_mxm_message_t *msgp;
item = opal_free_list_wait (&mca_mtl_mxm_component.mxm_messages);
if (OPAL_UNLIKELY(NULL == item)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
msgp = (ompi_mtl_mxm_message_t *) item;
req.base.state = MXM_REQ_NEW;
ompi_mtl_mxm_set_recv_envelope(&req, comm, src, tag);
msgp->mq = req.base.mq;
msgp->conn = req.base.conn;
msgp->tag = req.tag;
msgp->tag_mask = req.tag_mask;
err = mxm_req_mprobe(&req, &msgp->mxm_msg);
if (MXM_OK == err) {
if (MPI_STATUS_IGNORE != status) {
*matched = 1;
ompi_mtl_mxm_to_mpi_status(err, status);
status->MPI_SOURCE = req.completion.sender_imm;
status->MPI_TAG = req.completion.sender_tag;
status->_ucount = req.completion.sender_len;
} else{
*matched = 0;
*message = MPI_MESSAGE_NULL;
return OMPI_SUCCESS;
}
} else if (MXM_ERR_NO_MESSAGE == err) {
*matched = 0;
*message = MPI_MESSAGE_NULL;
return OMPI_SUCCESS;
} else {
return OMPI_ERROR;
}
(*message) = ompi_message_alloc();
if (OPAL_UNLIKELY(NULL == (*message))) {
*matched = 0;
*message = MPI_MESSAGE_NULL;
return OMPI_ERR_OUT_OF_RESOURCE;
}
(*message)->comm = comm;
(*message)->req_ptr = msgp;
(*message)->peer = status->MPI_SOURCE;
(*message)->count = status->_ucount;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,197 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/message/message.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "opal/util/show_help.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
static void ompi_mtl_mxm_recv_completion_cb(void *context)
{
mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
struct ompi_request_t *ompi_req = req->super.ompi_req;
mxm_recv_req_t *mxm_recv_req = &req->mxm.recv;
/* Set completion status and envelope */
ompi_mtl_mxm_to_mpi_status(mxm_recv_req->base.error, &ompi_req->req_status);
ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag;
ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm;
ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len;
req->super.completion_callback(&req->super);
}
static size_t ompi_mtl_mxm_stream_unpack(void *buffer, size_t length,
size_t offset, void *context)
{
struct iovec iov;
uint32_t iov_count = 1;
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) context;
opal_convertor_t *convertor = mtl_mxm_request->convertor;
iov.iov_len = length;
iov.iov_base = buffer;
opal_convertor_set_position(convertor, &offset);
opal_convertor_unpack(convertor, &iov, &iov_count, &length);
return length;
}
static inline __opal_attribute_always_inline__ int
ompi_mtl_mxm_choose_recv_datatype(mca_mtl_mxm_request_t *mtl_mxm_request)
{
void **buffer = &mtl_mxm_request->buf;
size_t *buffer_len = &mtl_mxm_request->length;
mxm_recv_req_t *mxm_recv_req = &mtl_mxm_request->mxm.recv;
opal_convertor_t *convertor = mtl_mxm_request->convertor;
opal_convertor_get_packed_size(convertor, buffer_len);
if (0 == *buffer_len) {
*buffer = NULL;
*buffer_len = 0;
mxm_recv_req->base.data_type = MXM_REQ_DATA_BUFFER;
return OMPI_SUCCESS;
}
if (opal_convertor_need_buffers(convertor)) {
mxm_recv_req->base.data_type = MXM_REQ_DATA_STREAM;
mxm_recv_req->base.data.stream.length = *buffer_len;
mxm_recv_req->base.data.stream.cb = ompi_mtl_mxm_stream_unpack;
return OMPI_SUCCESS;
}
mxm_recv_req->base.data_type = MXM_REQ_DATA_BUFFER;
*buffer = convertor->pBaseBuf +
convertor->use_desc->desc[convertor->use_desc->used].end_loop.first_elem_disp;
mxm_recv_req->base.data.buffer.ptr = *buffer;
mxm_recv_req->base.data.buffer.length = *buffer_len;
return OMPI_SUCCESS;
}
static inline __opal_attribute_always_inline__ int
ompi_mtl_mxm_recv_init(mca_mtl_mxm_request_t *mtl_mxm_request,
opal_convertor_t *convertor,
mxm_recv_req_t *mxm_recv_req)
{
int ret;
mtl_mxm_request->convertor = convertor;
ret = ompi_mtl_mxm_choose_recv_datatype(mtl_mxm_request);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
#if MXM_API >= MXM_VERSION(2,0)
mtl_mxm_request->is_send = 0;
#endif
mxm_recv_req->base.state = MXM_REQ_NEW;
#if MXM_API < MXM_VERSION(2,0)
mxm_recv_req->base.flags = 0;
#endif
mxm_recv_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
mxm_recv_req->base.context = mtl_mxm_request;
mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src, int tag,
struct opal_convertor_t *convertor,
struct mca_mtl_request_t *mtl_request)
{
int ret;
mxm_error_t err;
mxm_recv_req_t *mxm_recv_req;
mca_mtl_mxm_request_t *mtl_mxm_request;
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mxm_recv_req = &mtl_mxm_request->mxm.recv;
ompi_mtl_mxm_set_recv_envelope(mxm_recv_req, comm, src, tag);
/* prepare a receive request embedded in the MTL request */
ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
/* post-recv */
err = mxm_req_recv(mxm_recv_req);
if (OPAL_UNLIKELY(MXM_OK != err)) {
opal_show_help("help-mtl-mxm.txt", "error posting receive", true,
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
struct opal_convertor_t *convertor,
struct ompi_message_t **message,
struct mca_mtl_request_t *mtl_request)
{
int ret;
mxm_error_t err;
mxm_recv_req_t *mxm_recv_req;
mca_mtl_mxm_request_t *mtl_mxm_request;
ompi_mtl_mxm_message_t *msgp =
(ompi_mtl_mxm_message_t *) (*message)->req_ptr;
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mxm_recv_req = &mtl_mxm_request->mxm.recv;
/* prepare a receive request embedded in the MTL request */
ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
mxm_recv_req->tag = msgp->tag;
mxm_recv_req->tag_mask = msgp->tag_mask;
mxm_recv_req->base.mq = msgp->mq;
mxm_recv_req->base.conn = msgp->conn;
err = mxm_message_recv(mxm_recv_req, msgp->mxm_msg);
if (OPAL_UNLIKELY(MXM_OK != err)) {
opal_show_help("help-mtl-mxm.txt", "error posting message receive", true,
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
return OMPI_ERROR;
}
opal_free_list_return (&mca_mtl_mxm_component.mxm_messages, (opal_free_list_item_t *) msgp);
ompi_message_return(*message);
(*message) = MPI_MESSAGE_NULL;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,35 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MTL_MXM_REQUEST_H
#define OMPI_MTL_MXM_REQUEST_H
#include "opal/datatype/opal_convertor.h"
#include "mtl_mxm.h"
struct mca_mtl_mxm_request_t {
struct mca_mtl_request_t super;
union {
mxm_req_base_t base;
mxm_send_req_t send;
mxm_recv_req_t recv;
} mxm;
#if MXM_API >= MXM_VERSION(2,0)
int is_send;
#endif
/* mxm_segment_t mxm_segment[1]; */
void *buf;
size_t length;
struct opal_convertor_t *convertor;
bool free_after;
};
typedef struct mca_mtl_mxm_request_t mca_mtl_mxm_request_t;
#endif

Просмотреть файл

@ -1,238 +0,0 @@
/* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/pml/pml.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/util/show_help.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
static inline __opal_attribute_always_inline__
size_t ompi_mtl_mxm_stream_pack(opal_convertor_t *convertor, void *buffer,
size_t length, size_t offset)
{
struct iovec iov;
uint32_t iov_count = 1;
iov.iov_len = length;
iov.iov_base = buffer;
opal_convertor_set_position(convertor, &offset);
opal_convertor_pack(convertor, &iov, &iov_count, &length);
return length;
}
static size_t ompi_mtl_mxm_stream_isend(void *buffer, size_t length, size_t offset, void *context)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) context;
opal_convertor_t *convertor = mtl_mxm_request->convertor;
return ompi_mtl_mxm_stream_pack(convertor, buffer, length, offset);
}
static size_t ompi_mtl_mxm_stream_send(void *buffer, size_t length, size_t offset, void *context)
{
opal_convertor_t *convertor = (opal_convertor_t *) context;
return ompi_mtl_mxm_stream_pack(convertor, buffer, length, offset);
}
static inline __opal_attribute_always_inline__ int
ompi_mtl_mxm_choose_send_datatype(mxm_send_req_t *mxm_send_req,
opal_convertor_t *convertor,
mxm_stream_cb_t stream_cb)
{
struct iovec iov;
uint32_t iov_count = 1;
size_t *buffer_len = &mxm_send_req->base.data.buffer.length;
#if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
if (convertor->pDesc &&
opal_datatype_is_contiguous_memory_layout(convertor->pDesc,
convertor->count)) {
mxm_send_req->base.data.buffer.ptr = convertor->pBaseBuf;
mxm_send_req->base.data.buffer.length = convertor->local_size;
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
return OMPI_SUCCESS;
}
#endif
opal_convertor_get_packed_size(convertor, buffer_len);
if (0 == *buffer_len) {
mxm_send_req->base.data.buffer.ptr = NULL;
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
return OMPI_SUCCESS;
}
if (opal_convertor_need_buffers(convertor)) {
mxm_send_req->base.data_type = MXM_REQ_DATA_STREAM;
mxm_send_req->base.data.stream.length = *buffer_len;
mxm_send_req->base.data.stream.cb = stream_cb;
return OMPI_SUCCESS;
}
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
iov.iov_base = NULL;
iov.iov_len = *buffer_len;
opal_convertor_pack(convertor, &iov, &iov_count, buffer_len);
mxm_send_req->base.data.buffer.ptr = iov.iov_base;
return OMPI_SUCCESS;
}
static void ompi_mtl_mxm_send_completion_cb(void *context)
{
mca_mtl_mxm_request_t *mtl_mxm_request = context;
ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm.base.error,
&mtl_mxm_request->super.ompi_req->req_status);
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
}
static void ompi_mtl_mxm_send_progress_cb(void *user_data)
{
opal_progress();
}
int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode)
{
mxm_send_req_t mxm_send_req;
mxm_wait_t wait;
mxm_error_t err;
int ret;
/* prepare local send request */
mxm_send_req.base.state = MXM_REQ_NEW;
mxm_send_req.base.mq = ompi_mtl_mxm_mq_lookup(comm);
mxm_send_req.base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
mxm_send_req.base.context = convertor;
mxm_send_req.base.completed_cb = NULL;
ret = ompi_mtl_mxm_choose_send_datatype(&mxm_send_req, convertor,
ompi_mtl_mxm_stream_send);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
mxm_send_req.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
mxm_send_req.op.send.tag = tag;
mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
#if MXM_API < MXM_VERSION(2,0)
mxm_send_req.base.flags = MXM_REQ_FLAG_BLOCKING;
mxm_send_req.opcode = MXM_REQ_OP_SEND;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req.base.flags |= MXM_REQ_FLAG_SEND_SYNC;
}
#else
mxm_send_req.flags = MXM_REQ_SEND_FLAG_BLOCKING;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req.opcode = MXM_REQ_OP_SEND_SYNC;
} else {
mxm_send_req.opcode = MXM_REQ_OP_SEND;
}
#endif
/* post-send */
err = mxm_req_send(&mxm_send_req);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
return OMPI_ERROR;
}
/* wait for request completion */
wait.req = &mxm_send_req.base;
wait.state = MXM_REQ_COMPLETED;
wait.progress_cb = ompi_mtl_mxm_send_progress_cb;
wait.progress_arg = NULL;
mxm_wait(&wait);
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode, bool blocking,
mca_mtl_request_t * mtl_request)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) mtl_request;
mxm_send_req_t *mxm_send_req;
mxm_error_t err;
int ret;
assert(mtl == &ompi_mtl_mxm.super);
mtl_mxm_request->convertor = convertor;
mxm_send_req = &mtl_mxm_request->mxm.send;
#if MXM_API >= MXM_VERSION(2,0)
mtl_mxm_request->is_send = 1;
#endif
/* prepare a send request embedded in the MTL request */
mxm_send_req->base.state = MXM_REQ_NEW;
mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm);
mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
ret = ompi_mtl_mxm_choose_send_datatype(mxm_send_req, convertor,
ompi_mtl_mxm_stream_isend);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
mtl_mxm_request->buf = mxm_send_req->base.data.buffer.ptr;
mtl_mxm_request->length = mxm_send_req->base.data.buffer.length;
mxm_send_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
mxm_send_req->base.context = mtl_mxm_request;
mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;
#if MXM_API < MXM_VERSION(2,0)
mxm_send_req->base.flags = 0;
mxm_send_req->opcode = MXM_REQ_OP_SEND;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC;
}
#else
#if defined(MXM_REQ_SEND_FLAG_REENTRANT)
mxm_send_req->flags = MXM_REQ_SEND_FLAG_REENTRANT;
#else
mxm_send_req->flags = 0;
#endif
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req->opcode = MXM_REQ_OP_SEND_SYNC;
} else {
mxm_send_req->opcode = MXM_REQ_OP_SEND;
}
#endif
mxm_send_req->op.send.tag = tag;
mxm_send_req->op.send.imm_data = ompi_comm_rank(comm);
/* post-send */
err = mxm_req_send(mxm_send_req);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,123 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
#define MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "ompi/communicator/communicator.h"
#include "mtl_mxm_endpoint.h"
BEGIN_C_DECLS
/**
* MTL Module Interface
*/
typedef struct mca_mtl_mxm_module_t {
mca_mtl_base_module_t super; /**< base MTL interface */
int verbose;
int mxm_np;
mxm_h mxm_context;
mxm_ep_h ep;
mxm_context_opts_t *mxm_ctx_opts;
mxm_ep_opts_t *mxm_ep_opts;
#if MXM_API >= MXM_VERSION(2,0)
int using_mem_hooks;
#endif
#if MXM_API >= MXM_VERSION(3,1)
int bulk_connect; /* use bulk connect */
int bulk_disconnect; /* use bulk disconnect */
#endif
char* runtime_version;
char* compiletime_version;
} mca_mtl_mxm_module_t;
#if MXM_API < MXM_VERSION(2,0)
typedef struct ompi_mtl_mxm_ep_conn_info_t {
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
} ompi_mtl_mxm_ep_conn_info_t;
#endif
extern mca_mtl_mxm_module_t ompi_mtl_mxm;
typedef struct mca_mtl_mxm_component_t {
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
opal_free_list_t mxm_messages; /* will be used for MPI_Mprobe and MPI_Mrecv calls */
} mca_mtl_mxm_component_t;
OMPI_DECLSPEC mca_mtl_mxm_component_t mca_mtl_mxm_component;
static inline mxm_conn_h ompi_mtl_mxm_conn_lookup(struct ompi_communicator_t* comm, int rank) {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup(comm, rank);
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
if (endpoint != NULL) {
return endpoint->mxm_conn;
}
MXM_VERBOSE(80, "First communication with [%s:%s]: set endpoint connection.",
ompi_proc->super.proc_hostname, OPAL_NAME_PRINT(ompi_proc->super.proc_name));
ompi_mtl_add_single_proc(ompi_mtl, ompi_proc);
endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
return endpoint->mxm_conn;
}
static inline mxm_mq_h ompi_mtl_mxm_mq_lookup(struct ompi_communicator_t* comm) {
return (mxm_mq_h)comm->c_pml_comm;
}
static inline void ompi_mtl_mxm_to_mpi_status(mxm_error_t status, ompi_status_public_t *ompi_status) {
switch (status) {
case MXM_OK:
ompi_status->MPI_ERROR = OMPI_SUCCESS;
break;
case MXM_ERR_CANCELED:
ompi_status->_cancelled = true;
break;
case MXM_ERR_MESSAGE_TRUNCATED:
ompi_status->MPI_ERROR = MPI_ERR_TRUNCATE;
break;
default:
ompi_status->MPI_ERROR = MPI_ERR_INTERN;
break;
}
}
static inline void ompi_mtl_mxm_set_recv_envelope(mxm_recv_req_t *req,
struct ompi_communicator_t *comm,
int src, int tag) {
req->base.mq = (mxm_mq_h)comm->c_pml_comm;
req->base.conn = (src == MPI_ANY_SOURCE)
? NULL
: ompi_mtl_mxm_conn_lookup(comm, src);
if (tag == MPI_ANY_TAG) {
req->tag = 0;
req->tag_mask = 0x80000000U; /* MPI_ANY_TAG should not match against negative tags */
} else {
req->tag = tag;
req->tag_mask = 0xffffffffU;
}
}
END_C_DECLS
#endif

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: MELLANOX
status: active