Merge pull request #6879 from mwheinz/REF6877-master
PSM MTL is obsolete and should be removed
Этот коммит содержится в:
Коммит
ae1f7e0c3b
@ -1,92 +0,0 @@
|
|||||||
dnl -*- shell-script -*-
|
|
||||||
dnl
|
|
||||||
dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
dnl University Research and Technology
|
|
||||||
dnl Corporation. All rights reserved.
|
|
||||||
dnl Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
dnl of Tennessee Research Foundation. All rights
|
|
||||||
dnl reserved.
|
|
||||||
dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
dnl University of Stuttgart. All rights reserved.
|
|
||||||
dnl Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
dnl All rights reserved.
|
|
||||||
dnl Copyright (c) 2006 QLogic Corp. All rights reserved.
|
|
||||||
dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
|
|
||||||
dnl Copyright (c) 2015 Research Organization for Information Science
|
|
||||||
dnl and Technology (RIST). All rights reserved.
|
|
||||||
dnl Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
|
||||||
dnl reserved.
|
|
||||||
dnl Copyright (c) 2016 Intel Corporation. All rights reserved.
|
|
||||||
dnl
|
|
||||||
dnl $COPYRIGHT$
|
|
||||||
dnl
|
|
||||||
dnl Additional copyrights may follow
|
|
||||||
dnl
|
|
||||||
dnl $HEADER$
|
|
||||||
dnl
|
|
||||||
|
|
||||||
# OMPI_CHECK_PSM(prefix, [action-if-found], [action-if-not-found])
|
|
||||||
# --------------------------------------------------------
|
|
||||||
# check if PSM support can be found. sets prefix_{CPPFLAGS,
|
|
||||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
|
||||||
# support, otherwise executes action-if-not-found
|
|
||||||
AC_DEFUN([OMPI_CHECK_PSM],[
|
|
||||||
if test -z "$ompi_check_psm_happy" ; then
|
|
||||||
AC_ARG_WITH([psm],
|
|
||||||
[AC_HELP_STRING([--with-psm(=DIR)],
|
|
||||||
[Build PSM (Qlogic InfiniPath) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])])
|
|
||||||
OPAL_CHECK_WITHDIR([psm], [$with_psm], [include/psm.h])
|
|
||||||
AC_ARG_WITH([psm-libdir],
|
|
||||||
[AC_HELP_STRING([--with-psm-libdir=DIR],
|
|
||||||
[Search for PSM (QLogic InfiniPath PSM) libraries in DIR])])
|
|
||||||
OPAL_CHECK_WITHDIR([psm-libdir], [$with_psm_libdir], [libpsm_infinipath.*])
|
|
||||||
|
|
||||||
ompi_check_psm_$1_save_CPPFLAGS="$CPPFLAGS"
|
|
||||||
ompi_check_psm_$1_save_LDFLAGS="$LDFLAGS"
|
|
||||||
ompi_check_psm_$1_save_LIBS="$LIBS"
|
|
||||||
|
|
||||||
AS_IF([test "$with_psm" != "no"],
|
|
||||||
[AS_IF([test ! -z "$with_psm" && test "$with_psm" != "yes"],
|
|
||||||
[ompi_check_psm_dir="$with_psm"])
|
|
||||||
AS_IF([test ! -z "$with_psm_libdir" && test "$with_psm_libdir" != "yes"],
|
|
||||||
[ompi_check_psm_libdir="$with_psm_libdir"])
|
|
||||||
|
|
||||||
OPAL_CHECK_PACKAGE([ompi_check_psm],
|
|
||||||
[psm.h],
|
|
||||||
[psm_infinipath],
|
|
||||||
[psm_finalize],
|
|
||||||
[],
|
|
||||||
[$ompi_check_psm_dir],
|
|
||||||
[$ompi_check_psm_libdir],
|
|
||||||
[ompi_check_psm_happy="yes"],
|
|
||||||
[ompi_check_psm_happy="no"])],
|
|
||||||
[ompi_check_psm_happy="no"])
|
|
||||||
|
|
||||||
CPPFLAGS="$ompi_check_psm_$1_save_CPPFLAGS"
|
|
||||||
LDFLAGS="$ompi_check_psm_$1_save_LDFLAGS"
|
|
||||||
LIBS="$ompi_check_psm_$1_save_LIBS"
|
|
||||||
|
|
||||||
AS_IF([test "$ompi_check_psm_happy" = "yes" && test "$enable_progress_threads" = "yes"],
|
|
||||||
[AC_MSG_WARN([PSM driver does not currently support progress threads. Disabling BTL.])
|
|
||||||
ompi_check_psm_happy="no"])
|
|
||||||
|
|
||||||
AS_IF([test "$ompi_check_psm_happy" = "yes"],
|
|
||||||
[AC_CHECK_HEADERS(
|
|
||||||
glob.h,
|
|
||||||
[],
|
|
||||||
[AC_MSG_WARN([glob.h not found. Can not build component.])
|
|
||||||
ompi_check_psm_happy="no"])])
|
|
||||||
|
|
||||||
OPAL_SUMMARY_ADD([[Transports]],[[Intel TrueScale (PSM)]],[$1],[$ompi_check_psm_happy])
|
|
||||||
fi
|
|
||||||
|
|
||||||
AS_IF([test "$ompi_check_psm_happy" = "yes"],
|
|
||||||
[$1_LDFLAGS="[$]$1_LDFLAGS $ompi_check_psm_LDFLAGS"
|
|
||||||
$1_CPPFLAGS="[$]$1_CPPFLAGS $ompi_check_psm_CPPFLAGS"
|
|
||||||
$1_LIBS="[$]$1_LIBS $ompi_check_psm_LIBS"
|
|
||||||
$2],
|
|
||||||
[AS_IF([test ! -z "$with_psm" && test "$with_psm" != "no"],
|
|
||||||
[AC_MSG_ERROR([PSM support requested but not found. Aborting])])
|
|
||||||
$3])
|
|
||||||
])
|
|
||||||
|
|
@ -1,62 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
# University Research and Technology
|
|
||||||
# Corporation. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
# of Tennessee Research Foundation. All rights
|
|
||||||
# reserved.
|
|
||||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
# University of Stuttgart. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
# All rights reserved.
|
|
||||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
|
||||||
# Copyright (c) 2017 IBM Corporation. All rights reserved.
|
|
||||||
# $COPYRIGHT$
|
|
||||||
#
|
|
||||||
# Additional copyrights may follow
|
|
||||||
#
|
|
||||||
# $HEADER$
|
|
||||||
#
|
|
||||||
|
|
||||||
EXTRA_DIST = post_configure.sh
|
|
||||||
|
|
||||||
AM_CPPFLAGS = $(mtl_psm_CPPFLAGS)
|
|
||||||
|
|
||||||
dist_ompidata_DATA = help-mtl-psm.txt
|
|
||||||
|
|
||||||
mtl_psm_sources = \
|
|
||||||
mtl_psm.c \
|
|
||||||
mtl_psm.h \
|
|
||||||
mtl_psm_cancel.c \
|
|
||||||
mtl_psm_component.c \
|
|
||||||
mtl_psm_endpoint.c \
|
|
||||||
mtl_psm_endpoint.h \
|
|
||||||
mtl_psm_probe.c \
|
|
||||||
mtl_psm_recv.c \
|
|
||||||
mtl_psm_request.h \
|
|
||||||
mtl_psm_send.c \
|
|
||||||
mtl_psm_types.h
|
|
||||||
|
|
||||||
# Make the output library in this directory, and name it either
|
|
||||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
|
||||||
# (for static builds).
|
|
||||||
|
|
||||||
if MCA_BUILD_ompi_mtl_psm_DSO
|
|
||||||
component_noinst =
|
|
||||||
component_install = mca_mtl_psm.la
|
|
||||||
else
|
|
||||||
component_noinst = libmca_mtl_psm.la
|
|
||||||
component_install =
|
|
||||||
endif
|
|
||||||
|
|
||||||
mcacomponentdir = $(ompilibdir)
|
|
||||||
mcacomponent_LTLIBRARIES = $(component_install)
|
|
||||||
mca_mtl_psm_la_SOURCES = $(mtl_psm_sources)
|
|
||||||
mca_mtl_psm_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \
|
|
||||||
$(mtl_psm_LIBS)
|
|
||||||
mca_mtl_psm_la_LDFLAGS = -module -avoid-version $(mtl_psm_LDFLAGS)
|
|
||||||
|
|
||||||
noinst_LTLIBRARIES = $(component_noinst)
|
|
||||||
libmca_mtl_psm_la_SOURCES = $(mtl_psm_sources)
|
|
||||||
libmca_mtl_psm_la_LIBADD = $(mtl_psm_LIBS)
|
|
||||||
libmca_mtl_psm_la_LDFLAGS = -module -avoid-version $(mtl_psm_LDFLAGS)
|
|
@ -1,49 +0,0 @@
|
|||||||
# -*- shell-script -*-
|
|
||||||
#
|
|
||||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
# University Research and Technology
|
|
||||||
# Corporation. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
# of Tennessee Research Foundation. All rights
|
|
||||||
# reserved.
|
|
||||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
# University of Stuttgart. All rights reserved.
|
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
# All rights reserved.
|
|
||||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
|
||||||
# Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
|
|
||||||
# $COPYRIGHT$
|
|
||||||
#
|
|
||||||
# Additional copyrights may follow
|
|
||||||
#
|
|
||||||
# $HEADER$
|
|
||||||
#
|
|
||||||
|
|
||||||
# MCA_ompi_mtl_psm_POST_CONFIG(will_build)
|
|
||||||
# ----------------------------------------
|
|
||||||
# Only require the tag if we're actually going to be built
|
|
||||||
AC_DEFUN([MCA_ompi_mtl_psm_POST_CONFIG], [
|
|
||||||
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([MTL])])
|
|
||||||
])dnl
|
|
||||||
|
|
||||||
# MCA_mtl_psm_CONFIG([action-if-can-compile],
|
|
||||||
# [action-if-cant-compile])
|
|
||||||
# ------------------------------------------------
|
|
||||||
AC_DEFUN([MCA_ompi_mtl_psm_CONFIG],[
|
|
||||||
AC_CONFIG_FILES([ompi/mca/mtl/psm/Makefile])
|
|
||||||
|
|
||||||
OMPI_CHECK_PSM([mtl_psm],
|
|
||||||
[mtl_psm_happy="yes"],
|
|
||||||
[mtl_psm_happy="no"])
|
|
||||||
|
|
||||||
AS_IF([test "$mtl_psm_happy" = "yes"],
|
|
||||||
[$1],
|
|
||||||
[$2])
|
|
||||||
|
|
||||||
# substitute in the things needed to build psm
|
|
||||||
AC_SUBST([mtl_psm_CFLAGS])
|
|
||||||
AC_SUBST([mtl_psm_CPPFLAGS])
|
|
||||||
AC_SUBST([mtl_psm_LDFLAGS])
|
|
||||||
AC_SUBST([mtl_psm_LIBS])
|
|
||||||
])dnl
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
|||||||
# -*- text -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2009. QLogic Corporation. All rights reserved.
|
|
||||||
# $COPYRIGHT$
|
|
||||||
#
|
|
||||||
# Additional copyrights may follow
|
|
||||||
#
|
|
||||||
# $HEADER$
|
|
||||||
#
|
|
||||||
[psm init]
|
|
||||||
Initialization of PSM library failed.
|
|
||||||
|
|
||||||
Error: %s
|
|
||||||
#
|
|
||||||
[debug level]
|
|
||||||
Unable to set PSM debug level.
|
|
||||||
|
|
||||||
Error: %s
|
|
||||||
#
|
|
||||||
[unable to open endpoint]
|
|
||||||
PSM was unable to open an endpoint. Please make sure that the network link is
|
|
||||||
active on the node and the hardware is functioning.
|
|
||||||
|
|
||||||
Error: %s
|
|
||||||
#
|
|
||||||
[no uuid present]
|
|
||||||
Error obtaining unique transport key from ORTE (orte_precondition_transports %s
|
|
||||||
the environment).
|
|
||||||
|
|
||||||
Local host: %s
|
|
||||||
#
|
|
||||||
[error polling network]
|
|
||||||
Error %s occurred in attempting to make network progress (psm_mq_ipeek).
|
|
||||||
#
|
|
||||||
[error posting receive]
|
|
||||||
Unable to post application receive buffer (psm_mq_irecv).
|
|
||||||
|
|
||||||
Error: %s
|
|
||||||
Buffer: %p
|
|
||||||
Length: %llu
|
|
||||||
#
|
|
||||||
[path query mechanism unknown]
|
|
||||||
Unknown path record query mechanism %s. Supported mechanisms are %s.
|
|
||||||
#
|
|
||||||
[message too big]
|
|
||||||
Message size %llu bigger than supported by PSM API. Max = %llu
|
|
@ -1,482 +0,0 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2010 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
|
||||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2016 Research Organization for Information Science
|
|
||||||
* and Technology (RIST). All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
|
|
||||||
#include "opal/mca/pmix/pmix.h"
|
|
||||||
#include "ompi/mca/mtl/mtl.h"
|
|
||||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
|
||||||
#include "opal/util/show_help.h"
|
|
||||||
#include "ompi/proc/proc.h"
|
|
||||||
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
#include "mtl_psm_types.h"
|
|
||||||
#include "mtl_psm_endpoint.h"
|
|
||||||
#include "mtl_psm_request.h"
|
|
||||||
|
|
||||||
mca_mtl_psm_module_t ompi_mtl_psm = {
|
|
||||||
.super = {
|
|
||||||
/* NTH: PSM supports 16 bit context ids */
|
|
||||||
.mtl_max_contextid = (1UL << 16) - 1,
|
|
||||||
.mtl_max_tag = (1UL << 30), /* must allow negatives */
|
|
||||||
|
|
||||||
.mtl_add_procs = ompi_mtl_psm_add_procs,
|
|
||||||
.mtl_del_procs = ompi_mtl_psm_del_procs,
|
|
||||||
.mtl_finalize = ompi_mtl_psm_finalize,
|
|
||||||
|
|
||||||
.mtl_send = ompi_mtl_psm_send,
|
|
||||||
.mtl_isend = ompi_mtl_psm_isend,
|
|
||||||
|
|
||||||
.mtl_irecv = ompi_mtl_psm_irecv,
|
|
||||||
.mtl_iprobe = ompi_mtl_psm_iprobe,
|
|
||||||
.mtl_imrecv = ompi_mtl_psm_imrecv,
|
|
||||||
.mtl_improbe = ompi_mtl_psm_improbe,
|
|
||||||
|
|
||||||
.mtl_cancel = ompi_mtl_psm_cancel,
|
|
||||||
.mtl_add_comm = ompi_mtl_psm_add_comm,
|
|
||||||
.mtl_del_comm = ompi_mtl_psm_del_comm
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
psm_error_t
|
|
||||||
ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error,
|
|
||||||
const char *error_string, psm_error_token_t token)
|
|
||||||
{
|
|
||||||
switch (error) {
|
|
||||||
/* We don't want PSM to default to exiting when the following errors occur */
|
|
||||||
case PSM_EP_DEVICE_FAILURE:
|
|
||||||
case PSM_EP_NO_DEVICE:
|
|
||||||
case PSM_EP_NO_PORTS_AVAIL:
|
|
||||||
case PSM_EP_NO_NETWORK:
|
|
||||||
case PSM_EP_INVALID_UUID_KEY:
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"unable to open endpoint", true,
|
|
||||||
psm_error_get_string(error));
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* We can't handle any other errors than the ones above */
|
|
||||||
default:
|
|
||||||
opal_output(0, "Open MPI detected an unexpected PSM error in opening "
|
|
||||||
"an endpoint: %s\n", error_string);
|
|
||||||
return psm_error_defer(token);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ompi_mtl_psm_progress( void );
|
|
||||||
|
|
||||||
int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
|
|
||||||
psm_error_t err;
|
|
||||||
psm_ep_t ep; /* endpoint handle */
|
|
||||||
psm_mq_t mq;
|
|
||||||
psm_epid_t epid; /* unique lid+port identifier */
|
|
||||||
psm_uuid_t unique_job_key;
|
|
||||||
struct psm_ep_open_opts ep_opt;
|
|
||||||
unsigned long long *uu = (unsigned long long *) unique_job_key;
|
|
||||||
char *generated_key;
|
|
||||||
char env_string[256];
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
|
|
||||||
memset(uu, 0, sizeof(psm_uuid_t));
|
|
||||||
|
|
||||||
if (!generated_key || (strlen(generated_key) != 33) ||
|
|
||||||
sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
|
|
||||||
{
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"no uuid present", true,
|
|
||||||
generated_key ? "could not be parsed from" :
|
|
||||||
"not present in", ompi_process_info.nodename);
|
|
||||||
return OMPI_ERROR;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle our own errors for opening endpoints */
|
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
|
|
||||||
|
|
||||||
/* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM can allocate hardware
|
|
||||||
* contexts correctly.
|
|
||||||
*/
|
|
||||||
snprintf(env_string, sizeof(env_string), "%d", local_rank);
|
|
||||||
setenv("MPI_LOCALRANKID", env_string, 0);
|
|
||||||
snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
|
|
||||||
setenv("MPI_LOCALNRANKS", env_string, 0);
|
|
||||||
|
|
||||||
/* Setup the endpoint options. */
|
|
||||||
bzero((void*) &ep_opt, sizeof(ep_opt));
|
|
||||||
ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9;
|
|
||||||
ep_opt.unit = ompi_mtl_psm.ib_unit;
|
|
||||||
ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP; /* do not let PSM set affinity */
|
|
||||||
ep_opt.shm_mbytes = -1; /* Choose PSM defaults */
|
|
||||||
ep_opt.sendbufs_num = -1; /* Choose PSM defaults */
|
|
||||||
|
|
||||||
#if PSM_VERNO >= 0x0101
|
|
||||||
ep_opt.network_pkey = ompi_mtl_psm.ib_pkey;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if PSM_VERNO >= 0x0107
|
|
||||||
ep_opt.port = ompi_mtl_psm.ib_port;
|
|
||||||
ep_opt.outsl = ompi_mtl_psm.ib_service_level;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if PSM_VERNO >= 0x010d
|
|
||||||
ep_opt.service_id = ompi_mtl_psm.ib_service_id;
|
|
||||||
ep_opt.path_res_type = ompi_mtl_psm.path_res_type;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Open PSM endpoint */
|
|
||||||
err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
|
|
||||||
if (err) {
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"unable to open endpoint", true,
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Future errors are handled by the default error handler */
|
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
|
|
||||||
|
|
||||||
err = psm_mq_init(ep,
|
|
||||||
0xffff000000000000ULL,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
&mq);
|
|
||||||
if (err) {
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"psm init", true,
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
ompi_mtl_psm.ep = ep;
|
|
||||||
ompi_mtl_psm.epid = epid;
|
|
||||||
ompi_mtl_psm.mq = mq;
|
|
||||||
|
|
||||||
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
|
|
||||||
&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
&ompi_mtl_psm.epid,
|
|
||||||
sizeof(psm_epid_t));
|
|
||||||
|
|
||||||
if (OMPI_SUCCESS != rc) {
|
|
||||||
opal_output(0, "Open MPI couldn't send PSM epid to head node process");
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* register the psm progress function */
|
|
||||||
opal_progress_register(ompi_mtl_psm_progress);
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) {
|
|
||||||
psm_error_t err;
|
|
||||||
|
|
||||||
opal_progress_unregister(ompi_mtl_psm_progress);
|
|
||||||
|
|
||||||
/* free resources */
|
|
||||||
err = psm_mq_finalize(ompi_mtl_psm.mq);
|
|
||||||
if (err) {
|
|
||||||
opal_output(0, "Error in psm_mq_finalize (error %s)\n",
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = psm_ep_close(ompi_mtl_psm.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9);
|
|
||||||
if (err) {
|
|
||||||
opal_output(0, "Error in psm_ep_close (error %s)\n",
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = psm_finalize();
|
|
||||||
if (err) {
|
|
||||||
opal_output(0, "Error in psm_finalize (error %s)\n",
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
const char *
|
|
||||||
ompi_mtl_psm_connect_error_msg(psm_error_t err)
|
|
||||||
{
|
|
||||||
switch (err) { /* See if we expect the error */
|
|
||||||
case PSM_EPID_UNREACHABLE:
|
|
||||||
case PSM_EPID_INVALID_NODE:
|
|
||||||
case PSM_EPID_INVALID_MTU:
|
|
||||||
case PSM_EPID_INVALID_UUID_KEY:
|
|
||||||
case PSM_EPID_INVALID_VERSION:
|
|
||||||
case PSM_EPID_INVALID_CONNECT:
|
|
||||||
return psm_error_get_string(err);
|
|
||||||
break;
|
|
||||||
case PSM_EPID_UNKNOWN:
|
|
||||||
return "Connect status could not be determined "
|
|
||||||
"because of other errors";
|
|
||||||
default:
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef min
|
|
||||||
# define min(a,b) ((a) < (b) ? (a) : (b))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef max
|
|
||||||
# define max(a,b) ((a) > (b) ? (a) : (b))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t** procs)
|
|
||||||
{
|
|
||||||
int i,j;
|
|
||||||
int rc;
|
|
||||||
psm_epid_t *epids_in = NULL;
|
|
||||||
int *mask_in = NULL;
|
|
||||||
psm_epid_t *epid;
|
|
||||||
psm_epaddr_t *epaddrs_out = NULL;
|
|
||||||
psm_error_t *errs_out = NULL, err;
|
|
||||||
size_t size;
|
|
||||||
int proc_errors[PSM_ERROR_LAST] = { 0 };
|
|
||||||
int timeout_in_secs;
|
|
||||||
|
|
||||||
assert(mtl == &ompi_mtl_psm.super);
|
|
||||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
|
||||||
|
|
||||||
errs_out = (psm_error_t *) malloc(nprocs * sizeof(psm_error_t));
|
|
||||||
if (errs_out == NULL) {
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
epids_in = (psm_epid_t *) malloc(nprocs * sizeof(psm_epid_t));
|
|
||||||
if (epids_in == NULL) {
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
mask_in = (int *) malloc(nprocs * sizeof(int));
|
|
||||||
if (mask_in == NULL) {
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
epaddrs_out = (psm_epaddr_t *) malloc(nprocs * sizeof(psm_epaddr_t));
|
|
||||||
if (epaddrs_out == NULL) {
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
rc = OMPI_SUCCESS;
|
|
||||||
|
|
||||||
/* Get the epids for all the processes from modex */
|
|
||||||
for (i = 0; i < (int) nprocs; i++) {
|
|
||||||
if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
|
|
||||||
/* Already connected: don't connect again */
|
|
||||||
mask_in[i] = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
OPAL_MODEX_RECV(rc, &mca_mtl_psm_component.super.mtl_version,
|
|
||||||
&procs[i]->super.proc_name, (void**)&epid, &size);
|
|
||||||
if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) {
|
|
||||||
rc = OMPI_ERROR;
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
epids_in[i] = *epid;
|
|
||||||
mask_in[i] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
timeout_in_secs = max(ompi_mtl_psm.connect_timeout, 0.5 * nprocs);
|
|
||||||
|
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP);
|
|
||||||
|
|
||||||
err = psm_ep_connect(ompi_mtl_psm.ep,
|
|
||||||
nprocs,
|
|
||||||
epids_in,
|
|
||||||
mask_in,
|
|
||||||
errs_out,
|
|
||||||
epaddrs_out,
|
|
||||||
timeout_in_secs * 1e9);
|
|
||||||
if (err) {
|
|
||||||
char *errstr = (char *) ompi_mtl_psm_connect_error_msg(err);
|
|
||||||
if (errstr == NULL) {
|
|
||||||
opal_output(0, "PSM returned unhandled/unknown connect error: %s\n",
|
|
||||||
psm_error_get_string(err));
|
|
||||||
}
|
|
||||||
for (i = 0; i < (int) nprocs; i++) {
|
|
||||||
if (0 == mask_in[i]) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
psm_error_t thiserr = errs_out[i];
|
|
||||||
errstr = (char *) ompi_mtl_psm_connect_error_msg(thiserr);
|
|
||||||
if (proc_errors[thiserr] == 0) {
|
|
||||||
proc_errors[thiserr] = 1;
|
|
||||||
opal_output(0, "PSM EP connect error (%s):",
|
|
||||||
errstr ? errstr : "unknown connect error");
|
|
||||||
for (j = 0; j < (int) nprocs; j++) {
|
|
||||||
if (errs_out[j] == thiserr) {
|
|
||||||
opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ?
|
|
||||||
"unknown" : procs[j]->super.proc_hostname);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
opal_output(0, "\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = OMPI_ERROR;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* Default error handling is enabled, errors will not be returned to
|
|
||||||
* user. PSM prints the error and the offending endpoint's hostname
|
|
||||||
* and exits with -1 */
|
|
||||||
psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
|
|
||||||
|
|
||||||
/* Fill in endpoint data */
|
|
||||||
for (i = 0; i < (int) nprocs; i++) {
|
|
||||||
if (0 == mask_in[i]) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
mca_mtl_psm_endpoint_t *endpoint =
|
|
||||||
(mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t);
|
|
||||||
endpoint->peer_epid = epids_in[i];
|
|
||||||
endpoint->peer_addr = epaddrs_out[i];
|
|
||||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
bail:
|
|
||||||
if (epids_in != NULL) {
|
|
||||||
free(epids_in);
|
|
||||||
}
|
|
||||||
if (mask_in != NULL) {
|
|
||||||
free(mask_in);
|
|
||||||
}
|
|
||||||
if (errs_out != NULL) {
|
|
||||||
free(errs_out);
|
|
||||||
}
|
|
||||||
if (epaddrs_out != NULL) {
|
|
||||||
free(epaddrs_out);
|
|
||||||
}
|
|
||||||
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_del_procs(struct mca_mtl_base_module_t *mtl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t** procs)
|
|
||||||
{
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_add_comm(struct mca_mtl_base_module_t *mtl,
|
|
||||||
struct ompi_communicator_t *comm)
|
|
||||||
{
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_del_comm(struct mca_mtl_base_module_t *mtl,
|
|
||||||
struct ompi_communicator_t *comm)
|
|
||||||
{
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int ompi_mtl_psm_progress( void ) {
|
|
||||||
psm_error_t err;
|
|
||||||
mca_mtl_psm_request_t* mtl_psm_request;
|
|
||||||
psm_mq_status_t psm_status;
|
|
||||||
psm_mq_req_t req;
|
|
||||||
int completed = 1;
|
|
||||||
|
|
||||||
do {
|
|
||||||
err = psm_mq_ipeek(ompi_mtl_psm.mq, &req, NULL);
|
|
||||||
if (err == PSM_MQ_INCOMPLETE) {
|
|
||||||
return completed;
|
|
||||||
} else if (err != PSM_OK) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
completed++;
|
|
||||||
|
|
||||||
err = psm_mq_test(&req, &psm_status);
|
|
||||||
if (err != PSM_OK) {
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
mtl_psm_request = (mca_mtl_psm_request_t*) psm_status.context;
|
|
||||||
|
|
||||||
if (mtl_psm_request->type == OMPI_MTL_PSM_IRECV) {
|
|
||||||
ompi_mtl_datatype_unpack(mtl_psm_request->convertor,
|
|
||||||
mtl_psm_request->buf,
|
|
||||||
psm_status.msg_length);
|
|
||||||
|
|
||||||
mtl_psm_request->super.ompi_req->req_status.MPI_SOURCE =
|
|
||||||
PSM_GET_MQRANK(psm_status.msg_tag);
|
|
||||||
mtl_psm_request->super.ompi_req->req_status.MPI_TAG =
|
|
||||||
PSM_GET_MQUTAG(psm_status.msg_tag);
|
|
||||||
mtl_psm_request->super.ompi_req->req_status._ucount =
|
|
||||||
psm_status.nbytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(mtl_psm_request->type == OMPI_MTL_PSM_ISEND) {
|
|
||||||
if (mtl_psm_request->free_after) {
|
|
||||||
free(mtl_psm_request->buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (psm_status.error_code) {
|
|
||||||
case PSM_OK:
|
|
||||||
mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
|
|
||||||
OMPI_SUCCESS;
|
|
||||||
break;
|
|
||||||
case PSM_MQ_TRUNCATION:
|
|
||||||
mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
|
|
||||||
MPI_ERR_TRUNCATE;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
|
|
||||||
MPI_ERR_INTERN;
|
|
||||||
}
|
|
||||||
|
|
||||||
mtl_psm_request->super.completion_callback(&mtl_psm_request->super);
|
|
||||||
|
|
||||||
}
|
|
||||||
while (1);
|
|
||||||
|
|
||||||
error:
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"error polling network", true,
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
@ -1,110 +0,0 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
||||||
* reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MTL_PSM_H_HAS_BEEN_INCLUDED
|
|
||||||
#define MTL_PSM_H_HAS_BEEN_INCLUDED
|
|
||||||
|
|
||||||
#include "ompi/mca/pml/pml.h"
|
|
||||||
#include "ompi/mca/mtl/mtl.h"
|
|
||||||
#include "ompi/mca/mtl/base/base.h"
|
|
||||||
#include "ompi/proc/proc.h"
|
|
||||||
#include "opal/datatype/opal_convertor.h"
|
|
||||||
#include <psm.h>
|
|
||||||
#include <psm_mq.h>
|
|
||||||
|
|
||||||
BEGIN_C_DECLS
|
|
||||||
|
|
||||||
|
|
||||||
/* MTL interface functions */
|
|
||||||
extern int ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t* mtl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t** procs);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_del_procs(struct mca_mtl_base_module_t* mtl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t** procs);
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t* comm,
|
|
||||||
int dest,
|
|
||||||
int tag,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
mca_pml_base_send_mode_t mode);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t* comm,
|
|
||||||
int dest,
|
|
||||||
int tag,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
mca_pml_base_send_mode_t mode,
|
|
||||||
bool blocking,
|
|
||||||
mca_mtl_request_t * mtl_request);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t *comm,
|
|
||||||
int src,
|
|
||||||
int tag,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
struct mca_mtl_request_t *mtl_request);
|
|
||||||
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t *comm,
|
|
||||||
int src,
|
|
||||||
int tag,
|
|
||||||
int *flag,
|
|
||||||
struct ompi_status_public_t *status);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_imrecv(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
struct ompi_message_t **message,
|
|
||||||
struct mca_mtl_request_t *mtl_request);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_improbe(struct mca_mtl_base_module_t *mtl,
|
|
||||||
struct ompi_communicator_t *comm,
|
|
||||||
int src,
|
|
||||||
int tag,
|
|
||||||
int *matched,
|
|
||||||
struct ompi_message_t **message,
|
|
||||||
struct ompi_status_public_t *status);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_cancel(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct mca_mtl_request_t *mtl_request,
|
|
||||||
int flag);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_add_comm(struct mca_mtl_base_module_t *mtl,
|
|
||||||
struct ompi_communicator_t *comm);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_del_comm(struct mca_mtl_base_module_t *mtl,
|
|
||||||
struct ompi_communicator_t *comm);
|
|
||||||
|
|
||||||
extern int ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl);
|
|
||||||
|
|
||||||
int ompi_mtl_psm_module_init(int local_rank, int num_local_procs);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
END_C_DECLS
|
|
||||||
|
|
||||||
#endif /* MTL_PSM_H_HAS_BEEN_INCLUDED */
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
#include "mtl_psm_request.h"
|
|
||||||
|
|
||||||
int ompi_mtl_psm_cancel(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct mca_mtl_request_t *mtl_request,
|
|
||||||
int flag) {
|
|
||||||
|
|
||||||
psm_error_t err;
|
|
||||||
psm_mq_status_t status;
|
|
||||||
|
|
||||||
mca_mtl_psm_request_t *mtl_psm_request =
|
|
||||||
(mca_mtl_psm_request_t*) mtl_request;
|
|
||||||
|
|
||||||
/* PSM does not support canceling sends */
|
|
||||||
if(OMPI_MTL_PSM_ISEND == mtl_psm_request->type) {
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = psm_mq_cancel(&mtl_psm_request->psm_request);
|
|
||||||
if(PSM_OK == err) {
|
|
||||||
err = psm_mq_test(&mtl_psm_request->psm_request, &status);
|
|
||||||
if(PSM_OK == err) {
|
|
||||||
mtl_request->ompi_req->req_status._cancelled = true;
|
|
||||||
mtl_psm_request->super.completion_callback(&mtl_psm_request->super);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
} else {
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
} else if(PSM_MQ_INCOMPLETE == err) {
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
@ -1,365 +0,0 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2014 Intel Corporation. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
|
|
||||||
#include "opal/mca/event/event.h"
|
|
||||||
#include "opal/util/output.h"
|
|
||||||
#include "opal/util/show_help.h"
|
|
||||||
#include "ompi/proc/proc.h"
|
|
||||||
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
#include "mtl_psm_types.h"
|
|
||||||
#include "mtl_psm_request.h"
|
|
||||||
|
|
||||||
#include "psm.h"
|
|
||||||
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <glob.h>
|
|
||||||
|
|
||||||
static int param_priority;
|
|
||||||
|
|
||||||
static int ompi_mtl_psm_component_open(void);
|
|
||||||
static int ompi_mtl_psm_component_close(void);
|
|
||||||
static int ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority);
|
|
||||||
static int ompi_mtl_psm_component_register(void);
|
|
||||||
|
|
||||||
static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads,
|
|
||||||
bool enable_mpi_threads );
|
|
||||||
|
|
||||||
mca_mtl_psm_component_t mca_mtl_psm_component = {
|
|
||||||
|
|
||||||
{
|
|
||||||
/* First, the mca_base_component_t struct containing meta
|
|
||||||
* information about the component itself */
|
|
||||||
|
|
||||||
.mtl_version = {
|
|
||||||
MCA_MTL_BASE_VERSION_2_0_0,
|
|
||||||
|
|
||||||
.mca_component_name = "psm",
|
|
||||||
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
|
|
||||||
OMPI_RELEASE_VERSION),
|
|
||||||
.mca_open_component = ompi_mtl_psm_component_open,
|
|
||||||
.mca_close_component = ompi_mtl_psm_component_close,
|
|
||||||
.mca_query_component = ompi_mtl_psm_component_query,
|
|
||||||
.mca_register_component_params = ompi_mtl_psm_component_register,
|
|
||||||
},
|
|
||||||
.mtl_data = {
|
|
||||||
/* The component is not checkpoint ready */
|
|
||||||
MCA_BASE_METADATA_PARAM_NONE
|
|
||||||
},
|
|
||||||
|
|
||||||
.mtl_init = ompi_mtl_psm_component_init,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
#if PSM_VERNO >= 0x010d
|
|
||||||
static mca_base_var_enum_value_t path_query_values[] = {
|
|
||||||
{PSM_PATH_RES_NONE, "none"},
|
|
||||||
{PSM_PATH_RES_OPP, "opp"},
|
|
||||||
{0, NULL}
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static int
|
|
||||||
ompi_mtl_psm_component_register(void)
|
|
||||||
{
|
|
||||||
#if PSM_VERNO >= 0x010d
|
|
||||||
mca_base_var_enum_t *new_enum;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* set priority high enough to beat ob1's default */
|
|
||||||
param_priority = 30;
|
|
||||||
(void) mca_base_component_var_register (&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"priority", "Priority of the PSM MTL component",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
¶m_priority);
|
|
||||||
|
|
||||||
ompi_mtl_psm.connect_timeout = 180;
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"connect_timeout",
|
|
||||||
"PSM connection timeout value in seconds",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.connect_timeout);
|
|
||||||
|
|
||||||
ompi_mtl_psm.debug_level = 1;
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"debug", "PSM debug level",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.debug_level);
|
|
||||||
|
|
||||||
ompi_mtl_psm.ib_unit = -1;
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"ib_unit", "Truescale unit to use",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.ib_unit);
|
|
||||||
|
|
||||||
ompi_mtl_psm.ib_port = 0;
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"ib_port", "Truescale port on unit to use",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.ib_port);
|
|
||||||
|
|
||||||
ompi_mtl_psm.ib_service_level = 0;
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"ib_service_level", "Infiniband service level"
|
|
||||||
"(0 <= SL <= 15)", MCA_BASE_VAR_TYPE_INT,
|
|
||||||
NULL, 0, 0, OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.ib_service_level);
|
|
||||||
|
|
||||||
ompi_mtl_psm.ib_pkey = 0x7fffUL;
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"ib_pkey", "Infiniband partition key",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.ib_pkey);
|
|
||||||
|
|
||||||
#if PSM_VERNO >= 0x010d
|
|
||||||
ompi_mtl_psm.ib_service_id = 0x1000117500000000ull;
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"ib_service_id",
|
|
||||||
"Infiniband service ID to use for application (default is 0)",
|
|
||||||
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, NULL, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.ib_service_id);
|
|
||||||
|
|
||||||
ompi_mtl_psm.path_res_type = PSM_PATH_RES_NONE;
|
|
||||||
mca_base_var_enum_create("mtl_psm_path_query", path_query_values, &new_enum);
|
|
||||||
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
|
|
||||||
"path_query",
|
|
||||||
"Path record query mechanisms",
|
|
||||||
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
|
|
||||||
OPAL_INFO_LVL_9,
|
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
|
||||||
&ompi_mtl_psm.path_res_type);
|
|
||||||
OBJ_RELEASE(new_enum);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
ompi_mtl_psm_component_open(void)
|
|
||||||
{
|
|
||||||
struct stat st;
|
|
||||||
|
|
||||||
if (ompi_mtl_psm.ib_service_level < 0) {
|
|
||||||
ompi_mtl_psm.ib_service_level = 0;
|
|
||||||
} else if (ompi_mtl_psm.ib_service_level > 15) {
|
|
||||||
ompi_mtl_psm.ib_service_level = 15;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Component available only if Truescale hardware is present */
|
|
||||||
if (0 != stat("/dev/ipath", &st)) {
|
|
||||||
return OPAL_ERR_NOT_AVAILABLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Component available only if at least one qib port is ACTIVE */
|
|
||||||
bool foundOnlineQibPort = false;
|
|
||||||
size_t i;
|
|
||||||
char portState[128];
|
|
||||||
FILE *devFile;
|
|
||||||
glob_t globbuf;
|
|
||||||
globbuf.gl_offs = 0;
|
|
||||||
if (glob("/sys/class/infiniband/qib*/ports/*/state",
|
|
||||||
GLOB_DOOFFS, NULL, &globbuf) != 0) {
|
|
||||||
return OPAL_ERR_NOT_AVAILABLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i=0;i < globbuf.gl_pathc; i++) {
|
|
||||||
devFile = fopen(globbuf.gl_pathv[i], "r");
|
|
||||||
fgets(portState, sizeof(portState), devFile);
|
|
||||||
fclose(devFile);
|
|
||||||
|
|
||||||
if (strstr(portState, "ACTIVE") != NULL) {
|
|
||||||
/* Found at least one ACTIVE port */
|
|
||||||
foundOnlineQibPort = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
globfree(&globbuf);
|
|
||||||
|
|
||||||
if (!foundOnlineQibPort) {
|
|
||||||
return OPAL_ERR_NOT_AVAILABLE;
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* if we get here it means that PSM is available so give high priority
|
|
||||||
*/
|
|
||||||
|
|
||||||
*priority = param_priority;
|
|
||||||
*module = (mca_base_module_t *)&ompi_mtl_psm.super;
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static int
|
|
||||||
ompi_mtl_psm_component_close(void)
|
|
||||||
{
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
get_num_total_procs(int *out_ntp)
|
|
||||||
{
|
|
||||||
*out_ntp = (int)ompi_process_info.num_procs;
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
get_num_local_procs(int *out_nlp)
|
|
||||||
{
|
|
||||||
/* num_local_peers does not include us in
|
|
||||||
* its calculation, so adjust for that */
|
|
||||||
*out_nlp = (int)(1 + ompi_process_info.num_local_peers);
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
|
||||||
get_local_rank(int *out_rank)
|
|
||||||
{
|
|
||||||
ompi_node_rank_t my_node_rank;
|
|
||||||
|
|
||||||
*out_rank = 0;
|
|
||||||
|
|
||||||
if (OMPI_NODE_RANK_INVALID == (my_node_rank =
|
|
||||||
ompi_process_info.my_node_rank)) {
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
*out_rank = (int)my_node_rank;
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
static mca_mtl_base_module_t *
|
|
||||||
ompi_mtl_psm_component_init(bool enable_progress_threads,
|
|
||||||
bool enable_mpi_threads)
|
|
||||||
{
|
|
||||||
psm_error_t err;
|
|
||||||
int verno_major = PSM_VERNO_MAJOR;
|
|
||||||
int verno_minor = PSM_VERNO_MINOR;
|
|
||||||
int local_rank = -1, num_local_procs = 0;
|
|
||||||
int num_total_procs = 0;
|
|
||||||
|
|
||||||
/* Compute the total number of processes on this host and our local rank
|
|
||||||
* on that node. We need to provide PSM with these values so it can
|
|
||||||
* allocate hardware contexts appropriately across processes.
|
|
||||||
*/
|
|
||||||
if (OMPI_SUCCESS != get_num_local_procs(&num_local_procs)) {
|
|
||||||
opal_output(0, "Cannot determine number of local processes. "
|
|
||||||
"Cannot continue.\n");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if (OMPI_SUCCESS != get_local_rank(&local_rank)) {
|
|
||||||
opal_output(0, "Cannot determine local rank. Cannot continue.\n");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
if (OMPI_SUCCESS != get_num_total_procs(&num_total_procs)) {
|
|
||||||
opal_output(0, "Cannot determine total number of processes. "
|
|
||||||
"Cannot continue.\n");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#if PSM_VERNO >= 0x010c
|
|
||||||
/* Set infinipath debug level */
|
|
||||||
err = psm_setopt(PSM_COMPONENT_CORE, 0, PSM_CORE_OPT_DEBUG,
|
|
||||||
(const void*) &ompi_mtl_psm.debug_level,
|
|
||||||
sizeof(unsigned));
|
|
||||||
if (err) {
|
|
||||||
/* Non fatal error. Can continue */
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"psm init", false,
|
|
||||||
psm_error_get_string(err));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (getenv("PSM_DEVICES") == NULL) {
|
|
||||||
/* Only allow for shm and ipath devices in 2.0 and earlier releases
|
|
||||||
* (unless the user overrides the setting).
|
|
||||||
*/
|
|
||||||
if (PSM_VERNO >= 0x0104) {
|
|
||||||
if (num_local_procs == num_total_procs) {
|
|
||||||
setenv("PSM_DEVICES", "self,shm", 0);
|
|
||||||
} else {
|
|
||||||
setenv("PSM_DEVICES", "self,shm,ipath", 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (num_local_procs == num_total_procs) {
|
|
||||||
setenv("PSM_DEVICES", "shm", 0);
|
|
||||||
} else {
|
|
||||||
setenv("PSM_DEVICES", "shm,ipath", 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
err = psm_init(&verno_major, &verno_minor);
|
|
||||||
if (err) {
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"psm init", true,
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Complete PSM initialization */
|
|
||||||
ompi_mtl_psm_module_init(local_rank, num_local_procs);
|
|
||||||
|
|
||||||
ompi_mtl_psm.super.mtl_request_size =
|
|
||||||
sizeof(mca_mtl_psm_request_t) -
|
|
||||||
sizeof(struct mca_mtl_request_t);
|
|
||||||
|
|
||||||
/* don't register the err handler until we know we will be active */
|
|
||||||
err = psm_error_register_handler(NULL /* no ep */,
|
|
||||||
PSM_ERRHANDLER_NOP);
|
|
||||||
if (err) {
|
|
||||||
opal_output(0, "Error in psm_error_register_handler (error %s)\n",
|
|
||||||
psm_error_get_string(err));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return &ompi_mtl_psm.super;
|
|
||||||
}
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include "ompi/types.h"
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
#include "mtl_psm_types.h"
|
|
||||||
#include "mtl_psm_endpoint.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize state of the endpoint instance.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void mca_mtl_psm_endpoint_construct(mca_mtl_psm_endpoint_t* endpoint)
|
|
||||||
{
|
|
||||||
endpoint->mtl_psm_module = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Destroy a endpoint
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void mca_mtl_psm_endpoint_destruct(mca_mtl_psm_endpoint_t* endpoint)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
OBJ_CLASS_INSTANCE(
|
|
||||||
mca_mtl_psm_endpoint_t,
|
|
||||||
opal_list_item_t,
|
|
||||||
mca_mtl_psm_endpoint_construct,
|
|
||||||
mca_mtl_psm_endpoint_destruct);
|
|
||||||
|
|
@ -1,70 +0,0 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
||||||
* reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MCA_MTL_PSM_ENDPOINT_H
|
|
||||||
#define MCA_MTL_PSM_ENDPOINT_H
|
|
||||||
|
|
||||||
#include "opal/class/opal_list.h"
|
|
||||||
#include "opal/mca/event/event.h"
|
|
||||||
#include "ompi/mca/mtl/mtl.h"
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
|
|
||||||
#include "psm.h"
|
|
||||||
|
|
||||||
BEGIN_C_DECLS
|
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint_t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An abstraction that represents a connection to a endpoint process.
|
|
||||||
* An instance of mca_mtl_psm_endpoint_t is associated w/ each process
|
|
||||||
* and MTL pair at startup. However, connections to the endpoint
|
|
||||||
* are established dynamically on an as-needed basis:
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct mca_mtl_psm_endpoint_t {
|
|
||||||
opal_list_item_t super;
|
|
||||||
|
|
||||||
struct mca_mtl_psm_module_t* mtl_psm_module;
|
|
||||||
/**< MTL instance that created this connection */
|
|
||||||
|
|
||||||
psm_epid_t peer_epid;
|
|
||||||
/**< The unique epid for the opened port */
|
|
||||||
|
|
||||||
psm_epaddr_t peer_addr;
|
|
||||||
/**< The connected endpoint handle*/
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct mca_mtl_psm_endpoint_t mca_mtl_psm_endpoint_t;
|
|
||||||
OBJ_CLASS_DECLARATION(mca_mtl_psm_endpoint);
|
|
||||||
|
|
||||||
static inline mca_mtl_psm_endpoint_t *ompi_mtl_psm_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc)
|
|
||||||
{
|
|
||||||
if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL])) {
|
|
||||||
ompi_mtl_psm_add_procs (mtl, 1, &ompi_proc);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
|
||||||
}
|
|
||||||
|
|
||||||
END_C_DECLS
|
|
||||||
#endif
|
|
@ -1,83 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2010 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
|
|
||||||
* reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
#include "mtl_psm_types.h"
|
|
||||||
#include "psm.h"
|
|
||||||
#include "ompi/communicator/communicator.h"
|
|
||||||
|
|
||||||
|
|
||||||
int ompi_mtl_psm_iprobe(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t *comm,
|
|
||||||
int src,
|
|
||||||
int tag,
|
|
||||||
int *flag,
|
|
||||||
struct ompi_status_public_t *status)
|
|
||||||
{
|
|
||||||
uint64_t mqtag, tagsel;
|
|
||||||
psm_mq_status_t mqstat;
|
|
||||||
psm_error_t err;
|
|
||||||
|
|
||||||
PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
|
||||||
|
|
||||||
err = psm_mq_iprobe(ompi_mtl_psm.mq, mqtag, tagsel, &mqstat);
|
|
||||||
if (err == PSM_OK) {
|
|
||||||
*flag = 1;
|
|
||||||
if(MPI_STATUS_IGNORE != status) {
|
|
||||||
status->MPI_SOURCE = PSM_GET_MQRANK(mqstat.msg_tag);
|
|
||||||
status->MPI_TAG = PSM_GET_MQUTAG(mqstat.msg_tag);
|
|
||||||
status->_ucount = mqstat.nbytes;
|
|
||||||
|
|
||||||
switch (mqstat.error_code) {
|
|
||||||
case PSM_OK:
|
|
||||||
status->MPI_ERROR = OMPI_SUCCESS;
|
|
||||||
break;
|
|
||||||
case PSM_MQ_TRUNCATION:
|
|
||||||
status->MPI_ERROR = MPI_ERR_TRUNCATE;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
status->MPI_ERROR = MPI_ERR_INTERN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
else if (err == PSM_MQ_INCOMPLETE) {
|
|
||||||
*flag = 0;
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_improbe(struct mca_mtl_base_module_t *mtl,
|
|
||||||
struct ompi_communicator_t *comm,
|
|
||||||
int src,
|
|
||||||
int tag,
|
|
||||||
int *matched,
|
|
||||||
struct ompi_message_t **message,
|
|
||||||
struct ompi_status_public_t *status)
|
|
||||||
{
|
|
||||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
|
||||||
}
|
|
@ -1,97 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "ompi/communicator/communicator.h"
|
|
||||||
#include "opal/datatype/opal_convertor.h"
|
|
||||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
|
||||||
#include "opal/util/show_help.h"
|
|
||||||
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
#include "mtl_psm_types.h"
|
|
||||||
#include "mtl_psm_request.h"
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_irecv(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t *comm,
|
|
||||||
int src,
|
|
||||||
int tag,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
struct mca_mtl_request_t *mtl_request)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
psm_error_t err;
|
|
||||||
mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request;
|
|
||||||
uint64_t mqtag;
|
|
||||||
uint64_t tagsel;
|
|
||||||
size_t length;
|
|
||||||
|
|
||||||
ret = ompi_mtl_datatype_recv_buf(convertor,
|
|
||||||
&mtl_psm_request->buf,
|
|
||||||
&length,
|
|
||||||
&mtl_psm_request->free_after);
|
|
||||||
|
|
||||||
if (OMPI_SUCCESS != ret) return ret;
|
|
||||||
|
|
||||||
if (length >= 1ULL << sizeof(uint32_t) * 8) {
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"message too big", false,
|
|
||||||
length, 1ULL << sizeof(uint32_t) * 8);
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
mtl_psm_request->length = length;
|
|
||||||
mtl_psm_request->convertor = convertor;
|
|
||||||
mtl_psm_request->type = OMPI_MTL_PSM_IRECV;
|
|
||||||
|
|
||||||
PSM_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
printf("recv bits: 0x%016llx 0x%016llx\n", mqtag, tagsel);
|
|
||||||
#endif
|
|
||||||
err = psm_mq_irecv(ompi_mtl_psm.mq,
|
|
||||||
mqtag,
|
|
||||||
tagsel,
|
|
||||||
0,
|
|
||||||
mtl_psm_request->buf,
|
|
||||||
length,
|
|
||||||
mtl_psm_request,
|
|
||||||
&mtl_psm_request->psm_request);
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"error posting receive", true,
|
|
||||||
psm_error_get_string(err),
|
|
||||||
mtl_psm_request->buf, length);
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_imrecv(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
struct ompi_message_t **message,
|
|
||||||
struct mca_mtl_request_t *mtl_request)
|
|
||||||
{
|
|
||||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
|
||||||
}
|
|
@ -1,43 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef OMPI_MTL_PSM_REQUEST_H
|
|
||||||
#define OMPI_MTL_PSM_REQUEST_H
|
|
||||||
|
|
||||||
#include "opal/datatype/opal_convertor.h"
|
|
||||||
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
OMPI_MTL_PSM_ISEND,
|
|
||||||
OMPI_MTL_PSM_IRECV
|
|
||||||
} mca_mtl_psm_request_type_t;
|
|
||||||
|
|
||||||
struct mca_mtl_psm_request_t {
|
|
||||||
struct mca_mtl_request_t super;
|
|
||||||
mca_mtl_psm_request_type_t type;
|
|
||||||
psm_mq_req_t psm_request;
|
|
||||||
/* psm_segment_t psm_segment[1]; */
|
|
||||||
void *buf;
|
|
||||||
size_t length;
|
|
||||||
struct opal_convertor_t *convertor;
|
|
||||||
bool free_after;
|
|
||||||
};
|
|
||||||
typedef struct mca_mtl_psm_request_t mca_mtl_psm_request_t;
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,146 +0,0 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
||||||
* reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "ompi/mca/pml/pml.h"
|
|
||||||
#include "ompi/communicator/communicator.h"
|
|
||||||
#include "opal/datatype/opal_convertor.h"
|
|
||||||
#include "opal/util/show_help.h"
|
|
||||||
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
#include "mtl_psm_types.h"
|
|
||||||
#include "mtl_psm_request.h"
|
|
||||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_send(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t* comm,
|
|
||||||
int dest,
|
|
||||||
int tag,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
mca_pml_base_send_mode_t mode)
|
|
||||||
{
|
|
||||||
psm_error_t err;
|
|
||||||
mca_mtl_psm_request_t mtl_psm_request;
|
|
||||||
uint64_t mqtag;
|
|
||||||
uint32_t flags = 0;
|
|
||||||
int ret;
|
|
||||||
size_t length;
|
|
||||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
|
||||||
mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
|
|
||||||
|
|
||||||
assert(mtl == &ompi_mtl_psm.super);
|
|
||||||
|
|
||||||
mqtag = PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag);
|
|
||||||
|
|
||||||
ret = ompi_mtl_datatype_pack(convertor,
|
|
||||||
&mtl_psm_request.buf,
|
|
||||||
&length,
|
|
||||||
&mtl_psm_request.free_after);
|
|
||||||
|
|
||||||
if (OMPI_SUCCESS != ret) return ret;
|
|
||||||
|
|
||||||
if (length >= 1ULL << sizeof(uint32_t) * 8) {
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"message too big", false,
|
|
||||||
length, 1ULL << sizeof(uint32_t) * 8);
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
mtl_psm_request.length = length;
|
|
||||||
mtl_psm_request.convertor = convertor;
|
|
||||||
mtl_psm_request.type = OMPI_MTL_PSM_ISEND;
|
|
||||||
|
|
||||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
|
|
||||||
flags |= PSM_MQ_FLAG_SENDSYNC;
|
|
||||||
|
|
||||||
err = psm_mq_send(ompi_mtl_psm.mq,
|
|
||||||
psm_endpoint->peer_addr,
|
|
||||||
flags,
|
|
||||||
mqtag,
|
|
||||||
mtl_psm_request.buf,
|
|
||||||
length);
|
|
||||||
|
|
||||||
if (mtl_psm_request.free_after) {
|
|
||||||
free(mtl_psm_request.buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
return err == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
ompi_mtl_psm_isend(struct mca_mtl_base_module_t* mtl,
|
|
||||||
struct ompi_communicator_t* comm,
|
|
||||||
int dest,
|
|
||||||
int tag,
|
|
||||||
struct opal_convertor_t *convertor,
|
|
||||||
mca_pml_base_send_mode_t mode,
|
|
||||||
bool blocking,
|
|
||||||
mca_mtl_request_t * mtl_request)
|
|
||||||
{
|
|
||||||
psm_error_t psm_error;
|
|
||||||
uint64_t mqtag;
|
|
||||||
uint32_t flags = 0;
|
|
||||||
int ret;
|
|
||||||
mca_mtl_psm_request_t * mtl_psm_request = (mca_mtl_psm_request_t*) mtl_request;
|
|
||||||
size_t length;
|
|
||||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
|
|
||||||
mca_mtl_psm_endpoint_t* psm_endpoint = ompi_mtl_psm_get_endpoint (mtl, ompi_proc);
|
|
||||||
|
|
||||||
assert(mtl == &ompi_mtl_psm.super);
|
|
||||||
|
|
||||||
mqtag = PSM_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag);
|
|
||||||
|
|
||||||
|
|
||||||
ret = ompi_mtl_datatype_pack(convertor,
|
|
||||||
&mtl_psm_request->buf,
|
|
||||||
&length,
|
|
||||||
&mtl_psm_request->free_after);
|
|
||||||
|
|
||||||
|
|
||||||
if (OMPI_SUCCESS != ret) return ret;
|
|
||||||
|
|
||||||
if (length >= 1ULL << sizeof(uint32_t) * 8) {
|
|
||||||
opal_show_help("help-mtl-psm.txt",
|
|
||||||
"message too big", false,
|
|
||||||
length, 1ULL << sizeof(uint32_t) * 8);
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
mtl_psm_request->length= length;
|
|
||||||
mtl_psm_request->convertor = convertor;
|
|
||||||
mtl_psm_request->type = OMPI_MTL_PSM_ISEND;
|
|
||||||
|
|
||||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS)
|
|
||||||
flags |= PSM_MQ_FLAG_SENDSYNC;
|
|
||||||
|
|
||||||
psm_error = psm_mq_isend(ompi_mtl_psm.mq,
|
|
||||||
psm_endpoint->peer_addr,
|
|
||||||
flags,
|
|
||||||
mqtag,
|
|
||||||
mtl_psm_request->buf,
|
|
||||||
length,
|
|
||||||
mtl_psm_request,
|
|
||||||
&mtl_psm_request->psm_request);
|
|
||||||
|
|
||||||
return psm_error == PSM_OK ? OMPI_SUCCESS : OMPI_ERROR;
|
|
||||||
}
|
|
@ -1,97 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
||||||
* University Research and Technology
|
|
||||||
* Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
|
||||||
* of Tennessee Research Foundation. All rights
|
|
||||||
* reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
|
||||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
|
||||||
* All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MTL_PSM_TYPES_H_HAS_BEEN_INCLUDED
|
|
||||||
#define MTL_PSM_TYPES_H_HAS_BEEN_INCLUDED
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
#include "mtl_psm.h"
|
|
||||||
|
|
||||||
#include "ompi/mca/mtl/mtl.h"
|
|
||||||
#include "ompi/mca/mtl/base/base.h"
|
|
||||||
#include "mtl_psm_endpoint.h"
|
|
||||||
|
|
||||||
#include "psm.h"
|
|
||||||
|
|
||||||
|
|
||||||
BEGIN_C_DECLS
|
|
||||||
|
|
||||||
/**
|
|
||||||
* MTL Module Interface
|
|
||||||
*/
|
|
||||||
struct mca_mtl_psm_module_t {
|
|
||||||
mca_mtl_base_module_t super; /**< base MTL interface */
|
|
||||||
|
|
||||||
int32_t connect_timeout;
|
|
||||||
|
|
||||||
int32_t debug_level;
|
|
||||||
int32_t ib_unit;
|
|
||||||
int32_t ib_port;
|
|
||||||
int32_t ib_service_level;
|
|
||||||
uint64_t ib_pkey;
|
|
||||||
|
|
||||||
#if PSM_VERNO >= 0x010d
|
|
||||||
unsigned long long ib_service_id;
|
|
||||||
/* use int instead of psm_path_res_t so we can register this with
|
|
||||||
the MCA variable system */
|
|
||||||
int path_res_type;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
psm_ep_t ep;
|
|
||||||
psm_mq_t mq;
|
|
||||||
psm_epid_t epid;
|
|
||||||
psm_epaddr_t epaddr;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct mca_mtl_psm_module_t mca_mtl_psm_module_t;
|
|
||||||
|
|
||||||
extern mca_mtl_psm_module_t ompi_mtl_psm;
|
|
||||||
|
|
||||||
struct mca_mtl_psm_component_t {
|
|
||||||
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
|
|
||||||
};
|
|
||||||
typedef struct mca_mtl_psm_component_t mca_mtl_psm_component_t;
|
|
||||||
|
|
||||||
OMPI_DECLSPEC extern mca_mtl_psm_component_t mca_mtl_psm_component;
|
|
||||||
|
|
||||||
#define PSM_MAKE_MQTAG(ctxt,rank,utag) \
|
|
||||||
( (((ctxt)&0xffffULL)<<48)| (((rank)&0xffffULL)<<32)| \
|
|
||||||
(((utag)&0xffffffffULL)) )
|
|
||||||
|
|
||||||
#define PSM_GET_MQRANK(tag_u64) ((int)(((tag_u64)>>32)&0xffff))
|
|
||||||
#define PSM_GET_MQUTAG(tag_u64) ((int)((tag_u64)&0xffffffffULL))
|
|
||||||
|
|
||||||
#define PSM_MAKE_TAGSEL(user_rank, user_tag, user_ctxt, tag, tagsel) \
|
|
||||||
do { \
|
|
||||||
(tagsel) = 0xffffffffffffffffULL; \
|
|
||||||
(tag) = PSM_MAKE_MQTAG((user_ctxt),(user_rank),(user_tag)); \
|
|
||||||
if ((user_tag) == MPI_ANY_TAG) { \
|
|
||||||
(tagsel) &= ~0x7fffffffULL; \
|
|
||||||
(tag) &= ~0xffffffffULL; \
|
|
||||||
} \
|
|
||||||
if ((user_rank) == MPI_ANY_SOURCE) \
|
|
||||||
(tagsel) &= ~0xffff00000000ULL; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
END_C_DECLS
|
|
||||||
|
|
||||||
#endif /* MTL_PSM_TYPES_H_HAS_BEEN_INCLUDED */
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
|||||||
#
|
|
||||||
# owner/status file
|
|
||||||
# owner: institution that is responsible for this package
|
|
||||||
# status: e.g. active, maintenance, unmaintained
|
|
||||||
#
|
|
||||||
owner: INTEL
|
|
||||||
status: active
|
|
@ -1 +0,0 @@
|
|||||||
DIRECT_CALL_HEADER="ompi/mca/mtl/psm/mtl_psm.h"
|
|
Загрузка…
Ссылка в новой задаче
Block a user