Коммит
c67afadcfc
3
VERSION
3
VERSION
@ -107,3 +107,6 @@ libmca_common_verbs_so_version=0:0:0
|
||||
|
||||
# OPAL layer
|
||||
libmca_opal_common_pmi_so_version=0:0:0
|
||||
|
||||
# ORTE layer
|
||||
libmca_common_alps_so_version=0:0:0
|
||||
|
@ -17,112 +17,148 @@
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# ORTE_CHECK_ALPS_CLE4([action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
AC_DEFUN([ORTE_CHECK_ALPS_CLE4],[
|
||||
|
||||
#
|
||||
# if we've gotten here, its because we are building on a CLE 4 system
|
||||
#
|
||||
orte_check_alps_cle4_libdir_happy="no"
|
||||
orte_check_alps_cle4_dir_happy="no"
|
||||
|
||||
AC_MSG_CHECKING([Checking for ALPS components on a CLE 4 system with alps $with_alps])
|
||||
|
||||
AC_ARG_WITH([alps-libdir],
|
||||
[AC_HELP_STRING([--with-alps-libdir=DIR],
|
||||
[Location of alps libraries (alpslli, alpsutil) (default: /usr/lib/alps (/opt/cray/xe-sysroot/default/user on eslogin nodes))])])
|
||||
|
||||
#
|
||||
# check to see if Open MPI is being built on a CLE 4 eslogin node
|
||||
#
|
||||
AS_IF([test -f /etc/opt/cray/release/ESLrelease],
|
||||
[default_alps_dir="/opt/cray/xe-sysroot/default/usr"],
|
||||
[default_alps_dir="/usr"])
|
||||
|
||||
AS_IF([test -z "$with_alps_libdir"],
|
||||
[AS_IF([test "$with_alps" != "yes" -a "$with_alps" != "auto"],
|
||||
[AS_IF([test -d "$with_alps_libdir/lib64"],
|
||||
[orte_check_alps_libdir="$with_alps_libdir/lib64"],
|
||||
[orte_check_alps_libdir="$with_alps_libdir/lib"])],
|
||||
[ orte_check_alps_libdir="$default_alps_dir/lib/alps"])
|
||||
],[])
|
||||
|
||||
AS_IF([test "$with_alps" = "yes" -o "$with_alps" = "auto"],
|
||||
[orte_check_alps_dir=$default_alps_dir],
|
||||
[orte_check_alps_dir=$with_alps])
|
||||
|
||||
AC_MSG_CHECKING([if $orte_check_alps_libdir/libalps.a is present])
|
||||
AS_IF([test -f "$orte_check_alps_libdir/libalps.a"],
|
||||
[orte_check_alps_libdir_cle4_happy="yes"],
|
||||
[orte_check_alps_libdir_cle4_happy="no",
|
||||
AC_MSG_RESULT([no])])
|
||||
|
||||
AC_MSG_CHECKING([if $orte_check_alps_dir/include/alps/apInfo.h is present])
|
||||
AS_IF([test -f "$orte_check_alps_dir/include/alps/apInfo.h"],
|
||||
[orte_check_alps_dir_cle4_happy="yes"],
|
||||
[orte_check_alps_dir_cle4_happy="no"
|
||||
AC_MSG_RESULT([no])])
|
||||
|
||||
AS_IF([test "$orte_check_alps_libdir_cle4_happy" = "yes" -a "$orte_check_alps_dir_cle4_happy" = "yes"],
|
||||
[CRAY_ALPSLLI_CFLAGS="-I$orte_check_alps_dir/include"
|
||||
CRAY_ALPSLLI_LIBS="-L$orte_check_alps_libdir -lalpslli -lalpsutil"
|
||||
CRAY_ALPSLLI_STATIC_LIBS="-L$orte_check_alps_libdir -lalpslli -lalpsutil"
|
||||
$1],
|
||||
[$2])
|
||||
])
|
||||
|
||||
|
||||
|
||||
|
||||
# ORTE_CHECK_ALPS(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
AC_DEFUN([ORTE_CHECK_ALPS],[
|
||||
if test -z "$orte_check_alps_happy"; then
|
||||
# require that we check for pmi support request first so
|
||||
# we can get the static library ordering correct
|
||||
AC_REQUIRE([OPAL_CHECK_CRAY_PMI])
|
||||
|
||||
AC_ARG_WITH([alps],
|
||||
[AC_HELP_STRING([--with-alps(=DIR|yes|no)],
|
||||
[Build with ALPS scheduler component, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries (default: no)])])
|
||||
OPAL_CHECK_WITHDIR([alps], [$with_alps], [.])
|
||||
[Build with ALPS scheduler component, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries (default: auto)])],[],with_alps=auto)
|
||||
|
||||
AC_ARG_WITH([alps-libdir],
|
||||
[AC_HELP_STRING([--with-alps-libdir=DIR],
|
||||
[Location of alps libraries (alpslli, alpsutil) (default: /usr/lib/alps)])])
|
||||
|
||||
AC_ARG_WITH([wlm_detect],
|
||||
[AC_HELP_STRING([--with-wlm_detect(=DIR)],
|
||||
[Location of wlm_detect library needed by PMI on CLE 5 systems (default: /opt/cray/wlm_detect/default)])])
|
||||
|
||||
# save the CPPFLAGS so we can check for alps/apInfo.h without adding $with_alps/include to the global path
|
||||
orte_check_alps_$1_save_CPPFLAGS="$CPPFLAGS"
|
||||
|
||||
#
|
||||
# check to see where alps is installed, it wandered to a new location starting with CLE 5.0
|
||||
#
|
||||
|
||||
if test -f "/usr/lib/alps/libalps.a" ; then
|
||||
using_cle5_install="no"
|
||||
if test -f /etc/opt/cray/release/clerelease; then
|
||||
cle_level=`awk -F. '{print [$]1}' /etc/opt/cray/release/clerelease`
|
||||
else
|
||||
using_cle5_install="yes"
|
||||
if test -z "$with_wlm_detect" ; then
|
||||
with_wlm_detect="/opt/cray/wlm_detect/default"
|
||||
fi
|
||||
|
||||
# libpmi requires libugni for static linking on CLE 5. WTH!
|
||||
OPAL_CHECK_UGNI($1,[orte_check_alps_happy=yes],[orte_check_alps_happy=no])
|
||||
cle_level="unknown"
|
||||
fi
|
||||
|
||||
if test "$with_alps" = "no" -o -z "$with_alps" ; then
|
||||
orte_check_alps_happy="no"
|
||||
else
|
||||
# Only need to do these tests once - this macro is invoked
|
||||
# from multiple different components' configure.m4 scripts
|
||||
AC_MSG_CHECKING([for ALPS support cle level $cle_level])
|
||||
AS_IF([test "$cle_level" = "4" -a "$with_alps" != "no"],
|
||||
[ORTE_CHECK_ALPS_CLE4([orte_check_cray_alps_happy="yes"],
|
||||
[orte_check_cray_alps_happy="no"])],
|
||||
[AS_IF([test "$with_alps" = "no"],
|
||||
[AC_MSG_RESULT([no])
|
||||
$3],
|
||||
[AS_IF([test "$with_alps" = "auto" -o "$with_alps" = "yes"],
|
||||
[PKG_CHECK_MODULES_STATIC([CRAY_ALPSLLI], [cray-alpslli],
|
||||
[orte_check_cray_alps_happy="yes"],
|
||||
[orte_check_cray_alps_happy="no"]
|
||||
[AS_IF([test "$with_alps" = "yes"],
|
||||
[AC_MSG_WARN([ALPS support requested but pkg-config failed.])
|
||||
AC_MSG_WARN([Need to explicitly indicate ALPS directory])
|
||||
AC_MSG_WARN([on the configure line using --with-alps option.])
|
||||
AC_MSG_ERROR([Aborting])],[])]
|
||||
)
|
||||
PKG_CHECK_MODULES_STATIC([CRAY_ALPSUTIL], [cray-alpsutil],
|
||||
[orte_check_cray_alps_happy="yes"],
|
||||
[orte_check_cray_alps_happy="no"]
|
||||
[AS_IF([test "$with_alps" = "yes"],
|
||||
[AC_MSG_WARN([ALPS support requested but pkg-config failed.])
|
||||
AC_MSG_WARN([Need to explicitly indicate ALPS directory])
|
||||
AC_MSG_WARN([on the configure line using --with-alps option.])
|
||||
AC_MSG_ERROR([Aborting])],[])]
|
||||
)
|
||||
|
||||
orte_check_alps_happy="yes"
|
||||
orte_check_alps_libdir="$with_alps_libdir"
|
||||
|
||||
if test -z "$orte_check_alps_libdir" ; then
|
||||
if test "$with_alps" != "yes" ; then
|
||||
AS_IF([test -d "$with_alps/lib64"],
|
||||
[orte_check_alps_libdir="$with_alps/lib64"],
|
||||
[orte_check_alps_libdir="$with_alps/lib"])
|
||||
else
|
||||
if test "$using_cle5_install" = "yes"; then
|
||||
orte_check_alps_libdir="/opt/cray/alps/default/lib64"
|
||||
else
|
||||
orte_check_alps_libdir="/usr/lib/alps"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
PKG_CHECK_MODULES_STATIC([CRAY_ALPS], [cray-alps],
|
||||
[orte_check_cray_alps_happy="yes"],
|
||||
[orte_check_cray_alps_happy="no"]
|
||||
[AS_IF([test "$with_alps" = "yes"],
|
||||
[AC_MSG_WARN([ALPS support requested but pkg-config failed.])
|
||||
AC_MSG_WARN([Need to explicitly indicate ALPS directory])
|
||||
AC_MSG_WARN([on the configure line using --with-alps option.])
|
||||
AC_MSG_ERROR([Aborting])],[])]
|
||||
)
|
||||
|
||||
if test "$with_alps" = "yes" ; then
|
||||
AS_IF([test "$using_cle5_install" = "yes"],
|
||||
[orte_check_alps_dir="/opt/cray/alps/default"],
|
||||
[orte_check_alps_dir="/usr"])
|
||||
else
|
||||
orte_check_alps_dir="$with_alps"
|
||||
fi
|
||||
],
|
||||
[AC_MSG_WARN([See ./configure --help for how to control Open MPI])
|
||||
AC_MSG_WARN([configuration for ALPS on CLE 5 and higher systems])
|
||||
AC_MSG_ERROR([Aborting])])
|
||||
])
|
||||
])
|
||||
|
||||
if test -z "$orte_check_alps_pmi_happy"; then
|
||||
# if pmi support is requested, then OPAL_CHECK_PMI
|
||||
# will have added the -lpmi flag to LIBS. We then need
|
||||
# to add a couple of alps libs to support static
|
||||
# builds
|
||||
if test "$opal_enable_pmi" = 1 ; then
|
||||
AC_MSG_CHECKING([for alps libraries in "$orte_check_alps_libdir"])
|
||||
AC_MSG_RESULT([orte_check_cray_alps_happy = $orte_check_cray_alps_happy])
|
||||
|
||||
AS_IF([test "$orte_check_cray_alps_happy" = "yes" -a "$enable_static" = "yes"],
|
||||
[CRAY_ALPSLLI_LIBS = $CRAY_ALPSLLI_STATIC_LIBS
|
||||
CRAY_ALPSUTIL_LIBS = $CRAY_ALPSUTIL_STATIC_LIBS],
|
||||
[])
|
||||
|
||||
AC_MSG_RESULT([CRAY_ALPSLLI_STATIC_LIBS - $CRAY_ALPSLLI_STATIC_LIBS])
|
||||
AC_MSG_RESULT([CRAY_ALPSLLI_LIBS - $CRAY_ALPSLLI_LIBS])
|
||||
AC_MSG_RESULT([CRAY_ALPSLLI_CFLAGS - $CRAY_ALPSLLI_CFLAGS])
|
||||
|
||||
AC_MSG_RESULT([CRAY_ALPSUTIL_STATIC_LIBS - $CRAY_ALPSUTIL_STATIC_LIBS])
|
||||
AC_MSG_RESULT([CRAY_ALPSUTIL_LIBS - $CRAY_ALPSUTIL_LIBS])
|
||||
AC_MSG_RESULT([CRAY_ALPSUTIL_CFLAGS - $CRAY_ALPSUTIL_CFLAGS])
|
||||
|
||||
AC_MSG_RESULT([CRAY_ALPS_CFLAGS - $CRAY_ALPS_CFLAGS])
|
||||
|
||||
AS_IF([test "$orte_check_cray_alps_happy" = "yes"],
|
||||
[$1_LDFLAGS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS"
|
||||
$1_CPPFLAGS="$CRAY_ALPSLLI_CFLAGS $CRAY_ALPSUTIL_CFLAGS $CRAY_ALPS_CFLAGS"
|
||||
$1_LIBS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS"],
|
||||
[])
|
||||
|
||||
# libalpslli and libalpsutil are needed by libpmi to compile statically
|
||||
AS_IF([test -f "$orte_check_alps_libdir/libalpslli.a" -a -f "$orte_check_alps_libdir/libalpsutil.a"],
|
||||
[AC_MSG_RESULT([found])
|
||||
orte_check_alps_pmi_happy=yes],
|
||||
[AC_MSG_WARN([PMI support for Alps requested but not found])
|
||||
AC_MSG_ERROR([Cannot continue])])
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set LIBS, CPPFLAGS, and LDFLAGS here so they always get set
|
||||
if test "$orte_check_alps_happy" = "yes" -a "$opal_enable_pmi" = 1 ; then
|
||||
$1_LIBS="-lalpslli -lalpsutil"
|
||||
fi
|
||||
|
||||
$1_CPPFLAGS="-I$orte_check_alps_dir/include"
|
||||
$1_LDFLAGS="-L$orte_check_alps_libdir"
|
||||
|
||||
# Add CLE 5 library dependencies
|
||||
if test "using_cle5_install" = "yes" ; then
|
||||
$1_LIBS="$$1_LIBS -lwlm_detect"
|
||||
$1_LDFLAGS="$$1_LDFLAGS -L$with_wlm_detect"
|
||||
fi
|
||||
|
||||
AS_IF([test "$orte_check_alps_happy" = "yes"],
|
||||
[$2],
|
||||
[$3])
|
||||
AS_IF([test "$orte_check_cray_alps_happy" = "yes"],
|
||||
[$2], [$3])
|
||||
])
|
||||
|
@ -157,6 +157,7 @@ AC_SUBST(libmca_common_ofacm_so_version)
|
||||
AC_SUBST(libmca_common_sm_so_version)
|
||||
AC_SUBST(libmca_common_ugni_so_version)
|
||||
AC_SUBST(libmca_common_verbs_so_version)
|
||||
AC_SUBST(libmca_common_alps_so_version)
|
||||
|
||||
#
|
||||
# Get the versions of the autotools that were used to bootstrap us
|
||||
|
@ -105,10 +105,6 @@ static int pmix_cray_component_query(mca_base_module_t **module, int *priority)
|
||||
|
||||
static int pmix_cray_component_close(void)
|
||||
{
|
||||
int ret = OPAL_SUCCESS;
|
||||
|
||||
ret = opal_pmix_cray_module.finalize();
|
||||
|
||||
return ret;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
25
orte/mca/common/Makefile.am
Обычный файл
25
orte/mca/common/Makefile.am
Обычный файл
@ -0,0 +1,25 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Note that this file must exist, even though it is empty (there is no
|
||||
# "base" directory for the common framework). autogen.pl and
|
||||
# opal_mca.m4 assume that every framework has a top-level Makefile.am.
|
||||
# We *could* adjust the framework glue code to exclude "common" from
|
||||
# this requirement, but it's just a lot easier to have an empty
|
||||
# Makefile.am here.
|
66
orte/mca/common/alps/Makefile.am
Обычный файл
66
orte/mca/common/alps/Makefile.am
Обычный файл
@ -0,0 +1,66 @@
|
||||
#
|
||||
# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||||
# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||||
# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(common_alps_CPPFLAGS)
|
||||
|
||||
headers = \
|
||||
common_alps.h
|
||||
|
||||
sources = \
|
||||
common_alps.c
|
||||
|
||||
|
||||
lib_LTLIBRARIES =
|
||||
noinst_LTLIBRARIES =
|
||||
comp_inst = lib@ORTE_LIB_PREFIX@mca_common_alps.la
|
||||
comp_noinst = lib@ORTE_LIB_PREFIX@mca_common_alps_noinst.la
|
||||
|
||||
if MCA_BUILD_orte_common_alps_DSO
|
||||
lib_LTLIBRARIES += $(comp_inst)
|
||||
else
|
||||
noinst_LTLIBRARIES += $(comp_noinst)
|
||||
endif
|
||||
|
||||
lib@ORTE_LIB_PREFIX@mca_common_alps_la_SOURCES = $(headers) $(sources)
|
||||
lib@ORTE_LIB_PREFIX@mca_common_alps_la_CPPFLAGS = $(common_alps_CPPFLAGS)
|
||||
lib@ORTE_LIB_PREFIX@mca_common_alps_la_LDFLAGS = \
|
||||
-version-info $(libmca_common_alps_so_version) \
|
||||
$(common_alps_LDFLAGS)
|
||||
lib@ORTE_LIB_PREFIX@mca_common_alps_la_LIBADD = $(common_alps_LIBS)
|
||||
lib@ORTE_LIB_PREFIX@mca_common_alps_noinst_la_SOURCES = $(headers) $(sources)
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
opaldir = $(opalincludedir)/opal/mca/common/common_alps.h
|
||||
opal_HEADERS = $(headers)
|
||||
else
|
||||
opaldir = $(includedir)
|
||||
endif
|
||||
|
||||
# These two rules will sym link the "noinst" libtool library filename
|
||||
# to the installable libtool library filename in the case where we are
|
||||
# compiling this component statically (case 2), described above).
|
||||
V=0
|
||||
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
|
||||
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
|
||||
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`;
|
||||
|
||||
all-local:
|
||||
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
|
||||
rm -f "$(comp_inst)"; \
|
||||
$(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
|
||||
fi
|
||||
|
||||
clean-local:
|
||||
if test -z "$(lib_LTLIBRARIES)"; then \
|
||||
rm -f "$(comp_inst)"; \
|
||||
fi
|
73
orte/mca/common/alps/common_alps.c
Обычный файл
73
orte/mca/common/alps/common_alps.c
Обычный файл
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* These symbols are in a file by themselves to provide nice linker
|
||||
* semantics. Since linkers generally pull in symbols by object
|
||||
* files, keeping these symbols as the only symbols in this file
|
||||
* prevents utility programs such as "ompi_info" from having to import
|
||||
* entire components just to query their version and parameters.
|
||||
*/
|
||||
|
||||
#include "opal/types.h"
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
#include "orte/mca/common/alps/common_alps.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
|
||||
/*
|
||||
* determine whether or not calling process is in a Cray PAGG container
|
||||
*/
|
||||
|
||||
int orte_common_alps_proc_in_pagg(bool *flag)
|
||||
{
|
||||
int rc = ORTE_SUCCESS;
|
||||
const char proc_job_file[]="/proc/job";
|
||||
FILE *fd = NULL, *fd_task_is_app = NULL;
|
||||
char task_is_app_fname[PATH_MAX];
|
||||
|
||||
if (flag == NULL) {
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
fd = fopen(proc_job_file, "r");
|
||||
if (fd == NULL) {
|
||||
*flag = 0;
|
||||
} else {
|
||||
snprintf(task_is_app_fname,sizeof(task_is_app_fname),
|
||||
"/proc/self/task/%ld/task_is_app",syscall(SYS_gettid));
|
||||
fd_task_is_app = fopen(task_is_app_fname, "r");
|
||||
if (fd_task_is_app != NULL) { /* okay we're in a PAGG container,
|
||||
and we are an app task (not just a process
|
||||
running on a mom node, for example), */
|
||||
*flag = 1;
|
||||
fclose(fd_task_is_app);
|
||||
} else {
|
||||
*flag = 0;
|
||||
}
|
||||
fclose(fd);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
32
orte/mca/common/alps/common_alps.h
Обычный файл
32
orte/mca/common/alps/common_alps.h
Обычный файл
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||||
* Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _COMMON_ALPS_H_
|
||||
#define _COMMON_ALPS_H_
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* Determine if calling process is in a Cray PAGG job container.
|
||||
* flag set to TRUE if the process is in a PAGG, otherwise FALSE.
|
||||
*/
|
||||
OPAL_DECLSPEC int orte_common_alps_proc_in_pagg(bool *flag);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
40
orte/mca/common/alps/configure.m4
Обычный файл
40
orte/mca/common/alps/configure.m4
Обычный файл
@ -0,0 +1,40 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||||
# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||||
# Copyright (c) 2014 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_orte_common_alps_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_common_alps_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/common/alps/Makefile])
|
||||
|
||||
ORTE_CHECK_ALPS([common_alps], [common_alps_happy="yes"], [common_alps_happy="no"])
|
||||
|
||||
AS_IF([test "$common_alps_happy" = "yes"],
|
||||
[$1
|
||||
AC_SUBST([common_alps_CPPFLAGS])
|
||||
AC_SUBST([common_alps_LDFLAGS])
|
||||
AC_SUBST([common_alps_LIBS])],
|
||||
[$2])
|
||||
#
|
||||
])dnl
|
51
orte/mca/ess/alps/Makefile.am
Обычный файл
51
orte/mca/ess/alps/Makefile.am
Обычный файл
@ -0,0 +1,51 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
ess_alps.h \
|
||||
ess_alps_component.c \
|
||||
ess_alps_module.c \
|
||||
ess_alps_utils.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_ess_alps_DSO
|
||||
component_noinst =
|
||||
component_install = mca_ess_alps.la
|
||||
else
|
||||
component_noinst = libmca_ess_alps.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ompilibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_ess_alps_la_SOURCES = $(sources)
|
||||
mca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS) -fno-ident
|
||||
mca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS)
|
||||
mca_ess_alps_la_LIBADD = $(ess_alps_LDFLAGS) \
|
||||
$(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_ess_alps_la_SOURCES =$(sources)
|
||||
libmca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS)
|
||||
libmca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS)
|
||||
libmca_ess_alps_la_LIBADD = $(ess_alps_LIBS)
|
||||
|
37
orte/mca/ess/alps/configure.m4
Обычный файл
37
orte/mca/ess/alps/configure.m4
Обычный файл
@ -0,0 +1,37 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_ess_alps_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_ess_alps_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/ess/alps/Makefile])
|
||||
|
||||
ORTE_CHECK_ALPS([ess_alps], [ess_alps_happy="yes"], [ess_alps_happy="no"])
|
||||
|
||||
AS_IF([test "$ess_alps_happy" = "yes"],
|
||||
[$1
|
||||
AC_SUBST([ess_alps_CPPFLAGS])
|
||||
AC_SUBST([ess_alps_LDFLAGS])
|
||||
AC_SUBST([ess_alps_LIBS])],
|
||||
[$2])
|
||||
|
||||
])dnl
|
56
orte/mca/ess/alps/ess_alps.h
Обычный файл
56
orte/mca/ess/alps/ess_alps.h
Обычный файл
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_ESS_ALPS_H
|
||||
#define ORTE_ESS_ALPS_H
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
|
||||
#include "alps/alps.h"
|
||||
#include "alps/alps_toolAssist.h"
|
||||
#include "alps/libalpsutil.h"
|
||||
#include "alps/libalpslli.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
*/
|
||||
int orte_ess_alps_component_open(void);
|
||||
int orte_ess_alps_component_close(void);
|
||||
int orte_ess_alps_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
/*
|
||||
* alps component internal utility functions
|
||||
*/
|
||||
|
||||
int orte_ess_alps_get_first_rank_on_node(int *first_rank);
|
||||
int orte_ess_alps_sync_start(void);
|
||||
int orte_ess_alps_sync_complete(void);
|
||||
|
||||
/*
|
||||
* ODLS Alps module
|
||||
*/
|
||||
extern orte_ess_base_module_t orte_ess_alps_module;
|
||||
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_alps_component;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_ESS_ALPS_H */
|
107
orte/mca/ess/alps/ess_alps_component.c
Обычный файл
107
orte/mca/ess/alps/ess_alps_component.c
Обычный файл
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* These symbols are in a file by themselves to provide nice linker
|
||||
* semantics. Since linkers generally pull in symbols by object
|
||||
* files, keeping these symbols as the only symbols in this file
|
||||
* prevents utility programs such as "ompi_info" from having to import
|
||||
* entire components just to query their version and parameters.
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/common/alps/common_alps.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/ess/base/base.h"
|
||||
#include "orte/mca/ess/alps/ess_alps.h"
|
||||
|
||||
#include <sys/syscall.h>
|
||||
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
orte_ess_base_component_t mca_ess_alps_component = {
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
{
|
||||
ORTE_ESS_BASE_VERSION_3_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"alps",
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
orte_ess_alps_component_open,
|
||||
orte_ess_alps_component_close,
|
||||
orte_ess_alps_component_query
|
||||
},
|
||||
{
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
}
|
||||
};
|
||||
|
||||
int
|
||||
orte_ess_alps_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_ess_alps_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
int rc = ORTE_SUCCESS;
|
||||
bool flag;
|
||||
|
||||
/*
|
||||
* don't use the alps ess component if an app proc
|
||||
*/
|
||||
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* make sure we're in a Cray PAGG container, and that we are also on
|
||||
* a compute node (i.e. we are thought of as an application task by
|
||||
* the cray job kernel module - the thing that creates the PAGG)
|
||||
*/
|
||||
|
||||
rc = orte_common_alps_proc_in_pagg(&flag);
|
||||
if ((ORTE_SUCCESS == rc) && flag) {
|
||||
*priority = 35; /* take precendence over base */
|
||||
*module = (mca_base_module_t *) &orte_ess_alps_module;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
orte_ess_alps_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
229
orte/mca/ess/alps/ess_alps_module.c
Обычный файл
229
orte/mca/ess/alps/ess_alps_module.c
Обычный файл
@ -0,0 +1,229 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/nidmap.h"
|
||||
#include "orte/util/regex.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/ess/base/base.h"
|
||||
#include "orte/mca/ess/alps/ess_alps.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
static int alps_set_name(void);
|
||||
static int rte_init(void);
|
||||
static int rte_finalize(void);
|
||||
|
||||
orte_ess_base_module_t orte_ess_alps_module = {
|
||||
rte_init,
|
||||
rte_finalize,
|
||||
orte_ess_base_app_abort,
|
||||
NULL /* ft_event */
|
||||
};
|
||||
|
||||
/* Local variables */
|
||||
static orte_vpid_t starting_vpid = 0;
|
||||
|
||||
|
||||
static int rte_init(void)
|
||||
{
|
||||
int ret, i;
|
||||
char *error = NULL;
|
||||
char **hosts = NULL;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
|
||||
"ess:alps in rte_init"));
|
||||
|
||||
/*
|
||||
* shouldn't have been able to open this ess component if
|
||||
* process is app proc
|
||||
*/
|
||||
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
error = "mpi rank invoking alps rte_init";
|
||||
ret = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto fn_fail;
|
||||
}
|
||||
|
||||
/* run the prolog */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
|
||||
error = "orte_ess_base_std_prolog";
|
||||
goto fn_fail;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (ret = alps_set_name())) {
|
||||
error = "alps_set_name";
|
||||
goto fn_fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* if I am a daemon, complete my setup using the
|
||||
* default procedure
|
||||
*/
|
||||
if (ORTE_PROC_IS_DAEMON) {
|
||||
if (NULL != orte_node_regex) {
|
||||
/* extract the nodes */
|
||||
if (ORTE_SUCCESS != (ret =
|
||||
orte_regex_extract_node_names(orte_node_regex, &hosts)) ||
|
||||
NULL == hosts) {
|
||||
error = "orte_regex_extract_node_names";
|
||||
goto fn_fail;
|
||||
}
|
||||
|
||||
/* find our host in the list */
|
||||
for (i=0; NULL != hosts[i]; i++) {
|
||||
if (0 == strncmp(hosts[i], orte_process_info.nodename,
|
||||
strlen(hosts[i]))) {
|
||||
/* correct our vpid - this is probably not necessary with aprun*/
|
||||
ORTE_PROC_MY_NAME->vpid = starting_vpid + i;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
|
||||
"ess:alps reset name to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_orted_setup";
|
||||
goto fn_fail;
|
||||
}
|
||||
if (NULL != hosts) {
|
||||
opal_argv_free(hosts);
|
||||
}
|
||||
|
||||
/*
|
||||
* now synchronize with aprun.
|
||||
*/
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_start())) {
|
||||
error = "orte_ess_alps_sync";
|
||||
goto fn_fail;
|
||||
}
|
||||
|
||||
ret = ORTE_SUCCESS;
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_ess_base_tool_setup";
|
||||
goto fn_fail;
|
||||
}
|
||||
/* as a tool, I don't need a nidmap - so just return now */
|
||||
ret = ORTE_SUCCESS;
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
fn_exit:
|
||||
return ret;
|
||||
|
||||
fn_fail:
|
||||
if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
|
||||
orte_show_help("help-orte-runtime.txt",
|
||||
"orte_init:startup:internal-failure",
|
||||
true, error, ORTE_ERROR_NAME(ret), ret);
|
||||
}
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
static int rte_finalize(void)
|
||||
{
|
||||
int ret = ORTE_SUCCESS;
|
||||
|
||||
/* if I am a daemon, finalize using the default procedure */
|
||||
if (ORTE_PROC_IS_DAEMON) {
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
/* notify alps that we're done */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_complete())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
|
||||
} else if (ORTE_PROC_IS_TOOL) {
|
||||
/* otherwise, if I am a tool proc, use that procedure */
|
||||
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
}
|
||||
|
||||
fn_exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int alps_set_name(void)
|
||||
{
|
||||
int rc;
|
||||
int rank;
|
||||
orte_jobid_t jobid;
|
||||
|
||||
if (NULL == orte_ess_base_jobid) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (NULL == orte_ess_base_vpid) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&starting_vpid,
|
||||
orte_ess_base_vpid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
ORTE_PROC_MY_NAME->jobid = jobid;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_alps_get_first_rank_on_node(&rank))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
ORTE_PROC_MY_NAME->vpid = (orte_vpid_t)rank + starting_vpid;
|
||||
|
||||
/* get the num procs as provided in the cmd line param */
|
||||
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
241
orte/mca/ess/alps/ess_alps_utils.c
Обычный файл
241
orte/mca/ess/alps/ess_alps_utils.c
Обычный файл
@ -0,0 +1,241 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/nidmap.h"
|
||||
#include "orte/util/regex.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/ess/base/base.h"
|
||||
#include "orte/mca/ess/alps/ess_alps.h"
|
||||
|
||||
/*
|
||||
* use the Alps placement file to obtain
|
||||
* the global rank of the "first" local rank
|
||||
* on the node.
|
||||
*/
|
||||
|
||||
int
|
||||
orte_ess_alps_get_first_rank_on_node(int *first_rank)
|
||||
{
|
||||
int alps_status = 0;
|
||||
uint64_t apid;
|
||||
size_t alps_count;
|
||||
int ret = ORTE_SUCCESS;
|
||||
int lli_ret = 0, place_ret;
|
||||
alpsAppLayout_t orted_layout;
|
||||
|
||||
if (first_rank == NULL) {
|
||||
ret = ORTE_ERR_BAD_PARAM;
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
/*
|
||||
* First get our apid
|
||||
*/
|
||||
|
||||
lli_ret = alps_app_lli_lock();
|
||||
if (0 != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_lock returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_put_request - APID returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
goto fn_exit_w_lock;
|
||||
}
|
||||
|
||||
lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_get_response returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
|
||||
ret = ORTE_ERR_FILE_READ_FAILURE;
|
||||
goto fn_exit_w_lock;
|
||||
}
|
||||
|
||||
lli_ret = alps_app_lli_get_response_bytes (&apid, sizeof(apid));
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_get_response_bytes returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_READ_FAILURE;
|
||||
goto fn_exit_w_lock;
|
||||
}
|
||||
|
||||
place_ret = alps_get_placement_info(apid,
|
||||
&orted_layout,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
if (1 != place_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_get_placement_info returned %d (%s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, strerror(errno)));
|
||||
ret = ORTE_ERROR;
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_get_placement_info returned %d first pe on node is %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, orted_layout.firstPe));
|
||||
*first_rank = orted_layout.firstPe;
|
||||
|
||||
fn_exit_w_lock:
|
||||
lli_ret = alps_app_lli_unlock();
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_unlock returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
}
|
||||
|
||||
fn_exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to check in with apshepherd to say we are a parallel application
|
||||
*/
|
||||
int
|
||||
orte_ess_alps_sync_start(void)
|
||||
{
|
||||
int ret = ORTE_SUCCESS;
|
||||
int lli_ret = 0;
|
||||
int alps_status = 0;
|
||||
size_t alps_count;
|
||||
|
||||
lli_ret = alps_app_lli_lock();
|
||||
if (0 != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_lock returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_START, NULL, 0);
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_put_request returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
goto fn_exit_w_lock;
|
||||
}
|
||||
|
||||
lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_get_response returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
|
||||
ret = ORTE_ERR_FILE_READ_FAILURE;
|
||||
goto fn_exit_w_lock;
|
||||
}
|
||||
|
||||
fn_exit_w_lock:
|
||||
lli_ret = alps_app_lli_unlock();
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_unlock returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
}
|
||||
|
||||
fn_exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Function to check in with apshepherd to say we are a parallel application
|
||||
*/
|
||||
|
||||
int
|
||||
orte_ess_alps_sync_complete(void)
|
||||
{
|
||||
int ret = ORTE_SUCCESS;
|
||||
int lli_ret = 0;
|
||||
int alps_status = 0;
|
||||
size_t alps_count;
|
||||
|
||||
lli_ret = alps_app_lli_lock();
|
||||
if (0 != ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_lock returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_EXITING, NULL, 0);
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_put_request returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
goto fn_exit_w_lock;
|
||||
}
|
||||
|
||||
lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_get_response returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
|
||||
ret = ORTE_ERR_FILE_READ_FAILURE;
|
||||
goto fn_exit_w_lock;
|
||||
}
|
||||
|
||||
fn_exit_w_lock:
|
||||
lli_ret = alps_app_lli_unlock();
|
||||
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
|
||||
"%s ess:alps: alps_app_lli_unlock returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
|
||||
ret = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
}
|
||||
|
||||
fn_exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,8 @@ mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_odls_alps_la_SOURCES = $(sources)
|
||||
mca_odls_alps_la_CPPFLAGS = $(odls_alps_CPPFLAGS)
|
||||
mca_odls_alps_la_LDFLAGS = -module -avoid-version $(odls_alps_LDFLAGS)
|
||||
mca_odls_alps_la_LIBADD = $(odls_alps_LIBS)
|
||||
mca_odls_alps_la_LIBADD = $(odls_alps_LIBS) \
|
||||
$(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_odls_alps_la_SOURCES =$(sources)
|
||||
|
@ -25,29 +25,13 @@
|
||||
AC_DEFUN([MCA_orte_odls_alps_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/odls/alps/Makefile])
|
||||
|
||||
odls_alps_lli_happy="no"
|
||||
odls_alps_util_happy="no"
|
||||
ORTE_CHECK_ALPS([odls_alps], [odls_alps_happy="yes"], [odls_alps_happy="no"])
|
||||
|
||||
PKG_CHECK_MODULES([CRAY_ALPS_LLI], [cray-alpslli],
|
||||
[odls_alps_CPPFLAGS=$CRAY_ALPS_LLI_CFLAGS
|
||||
odls_alps_LDFLAGS=$CRAY_ALPS_LLI_LIBS
|
||||
odls_alps_LIBS=$CRAY_ALPS_LLI_LIBS
|
||||
odls_alps_lli_happy="yes"],
|
||||
[AC_MSG_RESULT([no])])
|
||||
|
||||
PKG_CHECK_MODULES([CRAY_ALPS_UTIL], [cray-alpsutil],
|
||||
[odls_alps_CPPFLAGS="$odls_alps_CPPFLAGS $CRAY_ALPS_UTIL_CFLAGS"
|
||||
odls_alps_LDFLAGS="$odls_alps_LDFLAGS $CRAY_ALPS_UTIL_LIBS"
|
||||
odls_alps_LIBS="$odls_alps_LIBS $CRAY_ALPS_LLI_LIBS"
|
||||
odls_alps_util_happy="yes"],
|
||||
[AC_MSG_RESULT([no])])
|
||||
|
||||
AS_IF([test "$odls_alps_lli_happy" = "yes" -a "$odls_alps_util_happy" = "yes"],
|
||||
AS_IF([test "$odls_alps_happy" = "yes"],
|
||||
[$1
|
||||
AC_SUBST([odls_alps_CPPFLAGS])
|
||||
AC_SUBST([odls_alps_LDFLAGS])
|
||||
AC_SUBST([odls_alps_LIBS])],
|
||||
[$2])
|
||||
|
||||
])dnl
|
||||
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "orte/mca/common/alps/common_alps.h"
|
||||
#include "orte/mca/odls/odls.h"
|
||||
#include "orte/mca/odls/base/odls_private.h"
|
||||
#include "orte/mca/odls/alps/odls_alps.h"
|
||||
@ -77,9 +78,7 @@ int orte_odls_alps_component_open(void)
|
||||
int orte_odls_alps_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
int rc = ORTE_SUCCESS;
|
||||
const char proc_job_file[]="/proc/job";
|
||||
FILE *fd = NULL, *fd_task_is_app = NULL;
|
||||
char task_is_app_fname[PATH_MAX];
|
||||
bool flag;
|
||||
|
||||
/*
|
||||
* make sure we're in a daemon process
|
||||
@ -97,26 +96,10 @@ int orte_odls_alps_component_query(mca_base_module_t **module, int *priority)
|
||||
* the cray job kernel module - the thing that creates the PAGG
|
||||
*/
|
||||
|
||||
/* disqualify ourselves if not running in a Cray PAGG container */
|
||||
fd = fopen(proc_job_file, "r");
|
||||
if (fd == NULL) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
rc = ORTE_ERROR;
|
||||
} else {
|
||||
snprintf(task_is_app_fname,sizeof(task_is_app_fname),
|
||||
"/proc/self/task/%ld/task_is_app",syscall(SYS_gettid));
|
||||
fd_task_is_app = fopen(task_is_app_fname, "r");
|
||||
if (fd_task_is_app != NULL) { /* okay we're in a PAGG container,
|
||||
and we are an app task (not just a process
|
||||
running on a mom node, for example),
|
||||
so we should give cray pmi a shot. */
|
||||
*priority = 10; /* take precendence over base */
|
||||
*module = (mca_base_module_t *) &orte_odls_alps_module;
|
||||
fclose(fd_task_is_app);
|
||||
rc = orte_odls_alps_get_rdma_creds();
|
||||
}
|
||||
fclose(fd);
|
||||
rc = orte_common_alps_proc_in_pagg(&flag);
|
||||
if ((ORTE_SUCCESS == rc) && flag) {
|
||||
*priority = 10; /* take precendence over base */
|
||||
*module = (mca_base_module_t *) &orte_odls_alps_module;
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
@ -740,6 +740,15 @@ int orte_odls_alps_launch_local_procs(opal_buffer_t *data)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* get the RDMA credentials and push them into the launch environment */
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_odls_alps_get_rdma_creds())) {;
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output,
|
||||
"%s odls:alps:launch:failed to get GNI rdma credentials %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* launch the local procs */
|
||||
ORTE_ACTIVATE_LOCAL_LAUNCH(job, odls_alps_fork_local_proc);
|
||||
|
||||
|
@ -231,12 +231,6 @@ int orte_odls_alps_get_rdma_creds(void)
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
#if 0
|
||||
fprintf(stderr,"apid = 0x%lx ptag0 %d cookie0 0x%x(%d) ptag1 %d cookie1 0x%x(%d)\n",apid,
|
||||
rdmacred_buf[0].ptag,rdmacred_buf[0].cookie,rdmacred_buf[0].cookie,
|
||||
rdmacred_buf[1].ptag,rdmacred_buf[1].cookie,rdmacred_buf[1].cookie);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
fn_exit:
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user