1
1

Merge pull request #289 from hppritcha/topic/remove_pmi

Topic/remove pmi
Этот коммит содержится в:
Howard Pritchard 2014-12-03 16:58:35 -07:00
родитель f989fe27b8 c75dccede1
Коммит c67afadcfc
20 изменённых файлов: 1107 добавлений и 143 удалений

Просмотреть файл

@ -107,3 +107,6 @@ libmca_common_verbs_so_version=0:0:0
# OPAL layer
libmca_opal_common_pmi_so_version=0:0:0
# ORTE layer
libmca_common_alps_so_version=0:0:0

Просмотреть файл

@ -17,112 +17,148 @@
#
# $HEADER$
#
# ORTE_CHECK_ALPS_CLE4([action-if-found], [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([ORTE_CHECK_ALPS_CLE4],[
#
# if we've gotten here, its because we are building on a CLE 4 system
#
orte_check_alps_cle4_libdir_happy="no"
orte_check_alps_cle4_dir_happy="no"
AC_MSG_CHECKING([Checking for ALPS components on a CLE 4 system with alps $with_alps])
AC_ARG_WITH([alps-libdir],
[AC_HELP_STRING([--with-alps-libdir=DIR],
[Location of alps libraries (alpslli, alpsutil) (default: /usr/lib/alps (/opt/cray/xe-sysroot/default/user on eslogin nodes))])])
#
# check to see if Open MPI is being built on a CLE 4 eslogin node
#
AS_IF([test -f /etc/opt/cray/release/ESLrelease],
[default_alps_dir="/opt/cray/xe-sysroot/default/usr"],
[default_alps_dir="/usr"])
AS_IF([test -z "$with_alps_libdir"],
[AS_IF([test "$with_alps" != "yes" -a "$with_alps" != "auto"],
[AS_IF([test -d "$with_alps_libdir/lib64"],
[orte_check_alps_libdir="$with_alps_libdir/lib64"],
[orte_check_alps_libdir="$with_alps_libdir/lib"])],
[ orte_check_alps_libdir="$default_alps_dir/lib/alps"])
],[])
AS_IF([test "$with_alps" = "yes" -o "$with_alps" = "auto"],
[orte_check_alps_dir=$default_alps_dir],
[orte_check_alps_dir=$with_alps])
AC_MSG_CHECKING([if $orte_check_alps_libdir/libalps.a is present])
AS_IF([test -f "$orte_check_alps_libdir/libalps.a"],
[orte_check_alps_libdir_cle4_happy="yes"],
[orte_check_alps_libdir_cle4_happy="no",
AC_MSG_RESULT([no])])
AC_MSG_CHECKING([if $orte_check_alps_dir/include/alps/apInfo.h is present])
AS_IF([test -f "$orte_check_alps_dir/include/alps/apInfo.h"],
[orte_check_alps_dir_cle4_happy="yes"],
[orte_check_alps_dir_cle4_happy="no"
AC_MSG_RESULT([no])])
AS_IF([test "$orte_check_alps_libdir_cle4_happy" = "yes" -a "$orte_check_alps_dir_cle4_happy" = "yes"],
[CRAY_ALPSLLI_CFLAGS="-I$orte_check_alps_dir/include"
CRAY_ALPSLLI_LIBS="-L$orte_check_alps_libdir -lalpslli -lalpsutil"
CRAY_ALPSLLI_STATIC_LIBS="-L$orte_check_alps_libdir -lalpslli -lalpsutil"
$1],
[$2])
])
# ORTE_CHECK_ALPS(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([ORTE_CHECK_ALPS],[
if test -z "$orte_check_alps_happy"; then
# require that we check for pmi support request first so
# we can get the static library ordering correct
AC_REQUIRE([OPAL_CHECK_CRAY_PMI])
AC_ARG_WITH([alps],
[AC_HELP_STRING([--with-alps(=DIR|yes|no)],
[Build with ALPS scheduler component, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries (default: no)])])
OPAL_CHECK_WITHDIR([alps], [$with_alps], [.])
[Build with ALPS scheduler component, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries (default: auto)])],[],with_alps=auto)
AC_ARG_WITH([alps-libdir],
[AC_HELP_STRING([--with-alps-libdir=DIR],
[Location of alps libraries (alpslli, alpsutil) (default: /usr/lib/alps)])])
AC_ARG_WITH([wlm_detect],
[AC_HELP_STRING([--with-wlm_detect(=DIR)],
[Location of wlm_detect library needed by PMI on CLE 5 systems (default: /opt/cray/wlm_detect/default)])])
# save the CPPFLAGS so we can check for alps/apInfo.h without adding $with_alps/include to the global path
orte_check_alps_$1_save_CPPFLAGS="$CPPFLAGS"
#
# check to see where alps is installed, it wandered to a new location starting with CLE 5.0
#
if test -f "/usr/lib/alps/libalps.a" ; then
using_cle5_install="no"
if test -f /etc/opt/cray/release/clerelease; then
cle_level=`awk -F. '{print [$]1}' /etc/opt/cray/release/clerelease`
else
using_cle5_install="yes"
if test -z "$with_wlm_detect" ; then
with_wlm_detect="/opt/cray/wlm_detect/default"
fi
# libpmi requires libugni for static linking on CLE 5. WTH!
OPAL_CHECK_UGNI($1,[orte_check_alps_happy=yes],[orte_check_alps_happy=no])
cle_level="unknown"
fi
if test "$with_alps" = "no" -o -z "$with_alps" ; then
orte_check_alps_happy="no"
else
# Only need to do these tests once - this macro is invoked
# from multiple different components' configure.m4 scripts
AC_MSG_CHECKING([for ALPS support cle level $cle_level])
AS_IF([test "$cle_level" = "4" -a "$with_alps" != "no"],
[ORTE_CHECK_ALPS_CLE4([orte_check_cray_alps_happy="yes"],
[orte_check_cray_alps_happy="no"])],
[AS_IF([test "$with_alps" = "no"],
[AC_MSG_RESULT([no])
$3],
[AS_IF([test "$with_alps" = "auto" -o "$with_alps" = "yes"],
[PKG_CHECK_MODULES_STATIC([CRAY_ALPSLLI], [cray-alpslli],
[orte_check_cray_alps_happy="yes"],
[orte_check_cray_alps_happy="no"]
[AS_IF([test "$with_alps" = "yes"],
[AC_MSG_WARN([ALPS support requested but pkg-config failed.])
AC_MSG_WARN([Need to explicitly indicate ALPS directory])
AC_MSG_WARN([on the configure line using --with-alps option.])
AC_MSG_ERROR([Aborting])],[])]
)
PKG_CHECK_MODULES_STATIC([CRAY_ALPSUTIL], [cray-alpsutil],
[orte_check_cray_alps_happy="yes"],
[orte_check_cray_alps_happy="no"]
[AS_IF([test "$with_alps" = "yes"],
[AC_MSG_WARN([ALPS support requested but pkg-config failed.])
AC_MSG_WARN([Need to explicitly indicate ALPS directory])
AC_MSG_WARN([on the configure line using --with-alps option.])
AC_MSG_ERROR([Aborting])],[])]
)
orte_check_alps_happy="yes"
orte_check_alps_libdir="$with_alps_libdir"
if test -z "$orte_check_alps_libdir" ; then
if test "$with_alps" != "yes" ; then
AS_IF([test -d "$with_alps/lib64"],
[orte_check_alps_libdir="$with_alps/lib64"],
[orte_check_alps_libdir="$with_alps/lib"])
else
if test "$using_cle5_install" = "yes"; then
orte_check_alps_libdir="/opt/cray/alps/default/lib64"
else
orte_check_alps_libdir="/usr/lib/alps"
fi
fi
fi
PKG_CHECK_MODULES_STATIC([CRAY_ALPS], [cray-alps],
[orte_check_cray_alps_happy="yes"],
[orte_check_cray_alps_happy="no"]
[AS_IF([test "$with_alps" = "yes"],
[AC_MSG_WARN([ALPS support requested but pkg-config failed.])
AC_MSG_WARN([Need to explicitly indicate ALPS directory])
AC_MSG_WARN([on the configure line using --with-alps option.])
AC_MSG_ERROR([Aborting])],[])]
)
if test "$with_alps" = "yes" ; then
AS_IF([test "$using_cle5_install" = "yes"],
[orte_check_alps_dir="/opt/cray/alps/default"],
[orte_check_alps_dir="/usr"])
else
orte_check_alps_dir="$with_alps"
fi
],
[AC_MSG_WARN([See ./configure --help for how to control Open MPI])
AC_MSG_WARN([configuration for ALPS on CLE 5 and higher systems])
AC_MSG_ERROR([Aborting])])
])
])
if test -z "$orte_check_alps_pmi_happy"; then
# if pmi support is requested, then OPAL_CHECK_PMI
# will have added the -lpmi flag to LIBS. We then need
# to add a couple of alps libs to support static
# builds
if test "$opal_enable_pmi" = 1 ; then
AC_MSG_CHECKING([for alps libraries in "$orte_check_alps_libdir"])
AC_MSG_RESULT([orte_check_cray_alps_happy = $orte_check_cray_alps_happy])
AS_IF([test "$orte_check_cray_alps_happy" = "yes" -a "$enable_static" = "yes"],
[CRAY_ALPSLLI_LIBS = $CRAY_ALPSLLI_STATIC_LIBS
CRAY_ALPSUTIL_LIBS = $CRAY_ALPSUTIL_STATIC_LIBS],
[])
AC_MSG_RESULT([CRAY_ALPSLLI_STATIC_LIBS - $CRAY_ALPSLLI_STATIC_LIBS])
AC_MSG_RESULT([CRAY_ALPSLLI_LIBS - $CRAY_ALPSLLI_LIBS])
AC_MSG_RESULT([CRAY_ALPSLLI_CFLAGS - $CRAY_ALPSLLI_CFLAGS])
AC_MSG_RESULT([CRAY_ALPSUTIL_STATIC_LIBS - $CRAY_ALPSUTIL_STATIC_LIBS])
AC_MSG_RESULT([CRAY_ALPSUTIL_LIBS - $CRAY_ALPSUTIL_LIBS])
AC_MSG_RESULT([CRAY_ALPSUTIL_CFLAGS - $CRAY_ALPSUTIL_CFLAGS])
AC_MSG_RESULT([CRAY_ALPS_CFLAGS - $CRAY_ALPS_CFLAGS])
AS_IF([test "$orte_check_cray_alps_happy" = "yes"],
[$1_LDFLAGS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS"
$1_CPPFLAGS="$CRAY_ALPSLLI_CFLAGS $CRAY_ALPSUTIL_CFLAGS $CRAY_ALPS_CFLAGS"
$1_LIBS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS"],
[])
# libalpslli and libalpsutil are needed by libpmi to compile statically
AS_IF([test -f "$orte_check_alps_libdir/libalpslli.a" -a -f "$orte_check_alps_libdir/libalpsutil.a"],
[AC_MSG_RESULT([found])
orte_check_alps_pmi_happy=yes],
[AC_MSG_WARN([PMI support for Alps requested but not found])
AC_MSG_ERROR([Cannot continue])])
fi
fi
fi
fi
# Set LIBS, CPPFLAGS, and LDFLAGS here so they always get set
if test "$orte_check_alps_happy" = "yes" -a "$opal_enable_pmi" = 1 ; then
$1_LIBS="-lalpslli -lalpsutil"
fi
$1_CPPFLAGS="-I$orte_check_alps_dir/include"
$1_LDFLAGS="-L$orte_check_alps_libdir"
# Add CLE 5 library dependencies
if test "using_cle5_install" = "yes" ; then
$1_LIBS="$$1_LIBS -lwlm_detect"
$1_LDFLAGS="$$1_LDFLAGS -L$with_wlm_detect"
fi
AS_IF([test "$orte_check_alps_happy" = "yes"],
[$2],
[$3])
AS_IF([test "$orte_check_cray_alps_happy" = "yes"],
[$2], [$3])
])

Просмотреть файл

@ -157,6 +157,7 @@ AC_SUBST(libmca_common_ofacm_so_version)
AC_SUBST(libmca_common_sm_so_version)
AC_SUBST(libmca_common_ugni_so_version)
AC_SUBST(libmca_common_verbs_so_version)
AC_SUBST(libmca_common_alps_so_version)
#
# Get the versions of the autotools that were used to bootstrap us

Просмотреть файл

@ -105,10 +105,6 @@ static int pmix_cray_component_query(mca_base_module_t **module, int *priority)
static int pmix_cray_component_close(void)
{
int ret = OPAL_SUCCESS;
ret = opal_pmix_cray_module.finalize();
return ret;
return OPAL_SUCCESS;
}

25
orte/mca/common/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,25 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Note that this file must exist, even though it is empty (there is no
# "base" directory for the common framework). autogen.pl and
# opal_mca.m4 assume that every framework has a top-level Makefile.am.
# We *could* adjust the framework glue code to exclude "common" from
# this requirement, but it's just a lot easier to have an empty
# Makefile.am here.

66
orte/mca/common/alps/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,66 @@
#
# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(common_alps_CPPFLAGS)
headers = \
common_alps.h
sources = \
common_alps.c
lib_LTLIBRARIES =
noinst_LTLIBRARIES =
comp_inst = lib@ORTE_LIB_PREFIX@mca_common_alps.la
comp_noinst = lib@ORTE_LIB_PREFIX@mca_common_alps_noinst.la
if MCA_BUILD_orte_common_alps_DSO
lib_LTLIBRARIES += $(comp_inst)
else
noinst_LTLIBRARIES += $(comp_noinst)
endif
lib@ORTE_LIB_PREFIX@mca_common_alps_la_SOURCES = $(headers) $(sources)
lib@ORTE_LIB_PREFIX@mca_common_alps_la_CPPFLAGS = $(common_alps_CPPFLAGS)
lib@ORTE_LIB_PREFIX@mca_common_alps_la_LDFLAGS = \
-version-info $(libmca_common_alps_so_version) \
$(common_alps_LDFLAGS)
lib@ORTE_LIB_PREFIX@mca_common_alps_la_LIBADD = $(common_alps_LIBS)
lib@ORTE_LIB_PREFIX@mca_common_alps_noinst_la_SOURCES = $(headers) $(sources)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
opaldir = $(opalincludedir)/opal/mca/common/common_alps.h
opal_HEADERS = $(headers)
else
opaldir = $(includedir)
endif
# These two rules will sym link the "noinst" libtool library filename
# to the installable libtool library filename in the case where we are
# compiling this component statically (case 2), described above).
V=0
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`;
all-local:
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
$(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
fi
clean-local:
if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
fi

73
orte/mca/common/alps/common_alps.c Обычный файл
Просмотреть файл

@ -0,0 +1,73 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "opal/types.h"
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/mca/common/alps/common_alps.h"
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
/*
* determine whether or not calling process is in a Cray PAGG container
*/
int orte_common_alps_proc_in_pagg(bool *flag)
{
int rc = ORTE_SUCCESS;
const char proc_job_file[]="/proc/job";
FILE *fd = NULL, *fd_task_is_app = NULL;
char task_is_app_fname[PATH_MAX];
if (flag == NULL) {
return ORTE_ERR_BAD_PARAM;
}
fd = fopen(proc_job_file, "r");
if (fd == NULL) {
*flag = 0;
} else {
snprintf(task_is_app_fname,sizeof(task_is_app_fname),
"/proc/self/task/%ld/task_is_app",syscall(SYS_gettid));
fd_task_is_app = fopen(task_is_app_fname, "r");
if (fd_task_is_app != NULL) { /* okay we're in a PAGG container,
and we are an app task (not just a process
running on a mom node, for example), */
*flag = 1;
fclose(fd_task_is_app);
} else {
*flag = 0;
}
fclose(fd);
}
return rc;
}

32
orte/mca/common/alps/common_alps.h Обычный файл
Просмотреть файл

@ -0,0 +1,32 @@
/*
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
* All rights reserved.
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
* Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _COMMON_ALPS_H_
#define _COMMON_ALPS_H_
#include "opal_config.h"
BEGIN_C_DECLS
/**
* Determine if calling process is in a Cray PAGG job container.
* flag set to TRUE if the process is in a PAGG, otherwise FALSE.
*/
OPAL_DECLSPEC int orte_common_alps_proc_in_pagg(bool *flag);
END_C_DECLS
#endif

40
orte/mca/common/alps/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
# Copyright (c) 2014 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_orte_common_alps_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_orte_common_alps_CONFIG],[
AC_CONFIG_FILES([orte/mca/common/alps/Makefile])
ORTE_CHECK_ALPS([common_alps], [common_alps_happy="yes"], [common_alps_happy="no"])
AS_IF([test "$common_alps_happy" = "yes"],
[$1
AC_SUBST([common_alps_CPPFLAGS])
AC_SUBST([common_alps_LDFLAGS])
AC_SUBST([common_alps_LIBS])],
[$2])
#
])dnl

51
orte/mca/ess/alps/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,51 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
ess_alps.h \
ess_alps_component.c \
ess_alps_module.c \
ess_alps_utils.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_ess_alps_DSO
component_noinst =
component_install = mca_ess_alps.la
else
component_noinst = libmca_ess_alps.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_ess_alps_la_SOURCES = $(sources)
mca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS) -fno-ident
mca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS)
mca_ess_alps_la_LIBADD = $(ess_alps_LDFLAGS) \
$(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_ess_alps_la_SOURCES =$(sources)
libmca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS)
libmca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS)
libmca_ess_alps_la_LIBADD = $(ess_alps_LIBS)

37
orte/mca/ess/alps/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,37 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ess_alps_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_orte_ess_alps_CONFIG],[
AC_CONFIG_FILES([orte/mca/ess/alps/Makefile])
ORTE_CHECK_ALPS([ess_alps], [ess_alps_happy="yes"], [ess_alps_happy="no"])
AS_IF([test "$ess_alps_happy" = "yes"],
[$1
AC_SUBST([ess_alps_CPPFLAGS])
AC_SUBST([ess_alps_LDFLAGS])
AC_SUBST([ess_alps_LIBS])],
[$2])
])dnl

56
orte/mca/ess/alps/ess_alps.h Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef ORTE_ESS_ALPS_H
#define ORTE_ESS_ALPS_H
#include "orte_config.h"
#include "opal/mca/mca.h"
#include "orte/mca/ess/ess.h"
#include "alps/alps.h"
#include "alps/alps_toolAssist.h"
#include "alps/libalpsutil.h"
#include "alps/libalpslli.h"
BEGIN_C_DECLS
/*
* Module open / close
*/
int orte_ess_alps_component_open(void);
int orte_ess_alps_component_close(void);
int orte_ess_alps_component_query(mca_base_module_t **module, int *priority);
/*
* alps component internal utility functions
*/
int orte_ess_alps_get_first_rank_on_node(int *first_rank);
int orte_ess_alps_sync_start(void);
int orte_ess_alps_sync_complete(void);
/*
* ODLS Alps module
*/
extern orte_ess_base_module_t orte_ess_alps_module;
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_alps_component;
END_C_DECLS
#endif /* ORTE_ESS_ALPS_H */

107
orte/mca/ess/alps/ess_alps_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,107 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/proc_info.h"
#include "orte/mca/common/alps/common_alps.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/alps/ess_alps.h"
#include <sys/syscall.h>
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_ess_base_component_t mca_ess_alps_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{
ORTE_ESS_BASE_VERSION_3_0_0,
/* Component name and version */
"alps",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_ess_alps_component_open,
orte_ess_alps_component_close,
orte_ess_alps_component_query
},
{
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
}
};
int
orte_ess_alps_component_open(void)
{
return ORTE_SUCCESS;
}
int orte_ess_alps_component_query(mca_base_module_t **module, int *priority)
{
int rc = ORTE_SUCCESS;
bool flag;
/*
* don't use the alps ess component if an app proc
*/
if (ORTE_PROC_IS_APP) {
*priority = 0;
*module = NULL;
return ORTE_ERROR;
}
/*
* make sure we're in a Cray PAGG container, and that we are also on
* a compute node (i.e. we are thought of as an application task by
* the cray job kernel module - the thing that creates the PAGG)
*/
rc = orte_common_alps_proc_in_pagg(&flag);
if ((ORTE_SUCCESS == rc) && flag) {
*priority = 35; /* take precendence over base */
*module = (mca_base_module_t *) &orte_ess_alps_module;
}
return rc;
}
int
orte_ess_alps_component_close(void)
{
return ORTE_SUCCESS;
}

229
orte/mca/ess/alps/ess_alps_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,229 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/util/show_help.h"
#include "opal/util/argv.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/base/base.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/regex.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/alps/ess_alps.h"
#include <errno.h>
static int alps_set_name(void);
static int rte_init(void);
static int rte_finalize(void);
orte_ess_base_module_t orte_ess_alps_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
NULL /* ft_event */
};
/* Local variables */
static orte_vpid_t starting_vpid = 0;
static int rte_init(void)
{
int ret, i;
char *error = NULL;
char **hosts = NULL;
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"ess:alps in rte_init"));
/*
* shouldn't have been able to open this ess component if
* process is app proc
*/
if (ORTE_PROC_IS_APP) {
error = "mpi rank invoking alps rte_init";
ret = ORTE_ERR_NOT_SUPPORTED;
goto fn_fail;
}
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
error = "orte_ess_base_std_prolog";
goto fn_fail;
}
if (ORTE_SUCCESS != (ret = alps_set_name())) {
error = "alps_set_name";
goto fn_fail;
}
/*
* if I am a daemon, complete my setup using the
* default procedure
*/
if (ORTE_PROC_IS_DAEMON) {
if (NULL != orte_node_regex) {
/* extract the nodes */
if (ORTE_SUCCESS != (ret =
orte_regex_extract_node_names(orte_node_regex, &hosts)) ||
NULL == hosts) {
error = "orte_regex_extract_node_names";
goto fn_fail;
}
/* find our host in the list */
for (i=0; NULL != hosts[i]; i++) {
if (0 == strncmp(hosts[i], orte_process_info.nodename,
strlen(hosts[i]))) {
/* correct our vpid - this is probably not necessary with aprun*/
ORTE_PROC_MY_NAME->vpid = starting_vpid + i;
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"ess:alps reset name to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
break;
}
}
}
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_orted_setup";
goto fn_fail;
}
if (NULL != hosts) {
opal_argv_free(hosts);
}
/*
* now synchronize with aprun.
*/
if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_start())) {
error = "orte_ess_alps_sync";
goto fn_fail;
}
ret = ORTE_SUCCESS;
goto fn_exit;
}
if (ORTE_PROC_IS_TOOL) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
goto fn_fail;
}
/* as a tool, I don't need a nidmap - so just return now */
ret = ORTE_SUCCESS;
goto fn_exit;
}
fn_exit:
return ret;
fn_fail:
if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);
}
goto fn_exit;
}
static int rte_finalize(void)
{
int ret = ORTE_SUCCESS;
/* if I am a daemon, finalize using the default procedure */
if (ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
ORTE_ERROR_LOG(ret);
goto fn_exit;
}
/* notify alps that we're done */
if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_complete())) {
ORTE_ERROR_LOG(ret);
}
} else if (ORTE_PROC_IS_TOOL) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
ORTE_ERROR_LOG(ret);
}
}
fn_exit:
return ret;
}
static int alps_set_name(void)
{
int rc;
int rank;
orte_jobid_t jobid;
if (NULL == orte_ess_base_jobid) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL == orte_ess_base_vpid) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&starting_vpid,
orte_ess_base_vpid))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
ORTE_PROC_MY_NAME->jobid = jobid;
if (ORTE_SUCCESS != (rc = orte_ess_alps_get_first_rank_on_node(&rank))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
ORTE_PROC_MY_NAME->vpid = (orte_vpid_t)rank + starting_vpid;
/* get the num procs as provided in the cmd line param */
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}

241
orte/mca/ess/alps/ess_alps_utils.c Обычный файл
Просмотреть файл

@ -0,0 +1,241 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/util/show_help.h"
#include "opal/util/argv.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/base/base.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/regex.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/alps/ess_alps.h"
/*
* use the Alps placement file to obtain
* the global rank of the "first" local rank
* on the node.
*/
int
orte_ess_alps_get_first_rank_on_node(int *first_rank)
{
int alps_status = 0;
uint64_t apid;
size_t alps_count;
int ret = ORTE_SUCCESS;
int lli_ret = 0, place_ret;
alpsAppLayout_t orted_layout;
if (first_rank == NULL) {
ret = ORTE_ERR_BAD_PARAM;
goto fn_exit;
}
/*
* First get our apid
*/
lli_ret = alps_app_lli_lock();
if (0 != ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_lock returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit;
}
lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0);
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_put_request - APID returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit_w_lock;
}
lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_get_response returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit_w_lock;
}
lli_ret = alps_app_lli_get_response_bytes (&apid, sizeof(apid));
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_get_response_bytes returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit_w_lock;
}
place_ret = alps_get_placement_info(apid,
&orted_layout,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL);
if (1 != place_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_get_placement_info returned %d (%s)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, strerror(errno)));
ret = ORTE_ERROR;
goto fn_exit;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_get_placement_info returned %d first pe on node is %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, orted_layout.firstPe));
*first_rank = orted_layout.firstPe;
fn_exit_w_lock:
lli_ret = alps_app_lli_unlock();
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_unlock returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
}
fn_exit:
return ret;
}
/*
* Function to check in with apshepherd to say we are a parallel application
*/
int
orte_ess_alps_sync_start(void)
{
int ret = ORTE_SUCCESS;
int lli_ret = 0;
int alps_status = 0;
size_t alps_count;
lli_ret = alps_app_lli_lock();
if (0 != ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_lock returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit;
}
lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_START, NULL, 0);
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_put_request returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit_w_lock;
}
lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_get_response returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit_w_lock;
}
fn_exit_w_lock:
lli_ret = alps_app_lli_unlock();
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_unlock returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
}
fn_exit:
return ret;
}
/*
* Function to check in with apshepherd to say we are a parallel application
*/
int
orte_ess_alps_sync_complete(void)
{
int ret = ORTE_SUCCESS;
int lli_ret = 0;
int alps_status = 0;
size_t alps_count;
lli_ret = alps_app_lli_lock();
if (0 != ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_lock returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit;
}
lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_EXITING, NULL, 0);
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_put_request returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
goto fn_exit_w_lock;
}
lli_ret = alps_app_lli_get_response (&alps_status, &alps_count);
if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_get_response returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status));
ret = ORTE_ERR_FILE_READ_FAILURE;
goto fn_exit_w_lock;
}
fn_exit_w_lock:
lli_ret = alps_app_lli_unlock();
if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) {
OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output,
"%s ess:alps: alps_app_lli_unlock returned %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret));
ret = ORTE_ERR_FILE_WRITE_FAILURE;
}
fn_exit:
return ret;
}

Просмотреть файл

@ -44,7 +44,8 @@ mcacomponent_LTLIBRARIES = $(component_install)
mca_odls_alps_la_SOURCES = $(sources)
mca_odls_alps_la_CPPFLAGS = $(odls_alps_CPPFLAGS)
mca_odls_alps_la_LDFLAGS = -module -avoid-version $(odls_alps_LDFLAGS)
mca_odls_alps_la_LIBADD = $(odls_alps_LIBS)
mca_odls_alps_la_LIBADD = $(odls_alps_LIBS) \
$(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_odls_alps_la_SOURCES =$(sources)

Просмотреть файл

@ -25,29 +25,13 @@
AC_DEFUN([MCA_orte_odls_alps_CONFIG],[
AC_CONFIG_FILES([orte/mca/odls/alps/Makefile])
odls_alps_lli_happy="no"
odls_alps_util_happy="no"
ORTE_CHECK_ALPS([odls_alps], [odls_alps_happy="yes"], [odls_alps_happy="no"])
PKG_CHECK_MODULES([CRAY_ALPS_LLI], [cray-alpslli],
[odls_alps_CPPFLAGS=$CRAY_ALPS_LLI_CFLAGS
odls_alps_LDFLAGS=$CRAY_ALPS_LLI_LIBS
odls_alps_LIBS=$CRAY_ALPS_LLI_LIBS
odls_alps_lli_happy="yes"],
[AC_MSG_RESULT([no])])
PKG_CHECK_MODULES([CRAY_ALPS_UTIL], [cray-alpsutil],
[odls_alps_CPPFLAGS="$odls_alps_CPPFLAGS $CRAY_ALPS_UTIL_CFLAGS"
odls_alps_LDFLAGS="$odls_alps_LDFLAGS $CRAY_ALPS_UTIL_LIBS"
odls_alps_LIBS="$odls_alps_LIBS $CRAY_ALPS_LLI_LIBS"
odls_alps_util_happy="yes"],
[AC_MSG_RESULT([no])])
AS_IF([test "$odls_alps_lli_happy" = "yes" -a "$odls_alps_util_happy" = "yes"],
AS_IF([test "$odls_alps_happy" = "yes"],
[$1
AC_SUBST([odls_alps_CPPFLAGS])
AC_SUBST([odls_alps_LDFLAGS])
AC_SUBST([odls_alps_LIBS])],
[$2])
])dnl

Просмотреть файл

@ -37,6 +37,7 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/common/alps/common_alps.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/odls/base/odls_private.h"
#include "orte/mca/odls/alps/odls_alps.h"
@ -77,9 +78,7 @@ int orte_odls_alps_component_open(void)
int orte_odls_alps_component_query(mca_base_module_t **module, int *priority)
{
int rc = ORTE_SUCCESS;
const char proc_job_file[]="/proc/job";
FILE *fd = NULL, *fd_task_is_app = NULL;
char task_is_app_fname[PATH_MAX];
bool flag;
/*
* make sure we're in a daemon process
@ -97,26 +96,10 @@ int orte_odls_alps_component_query(mca_base_module_t **module, int *priority)
* the cray job kernel module - the thing that creates the PAGG
*/
/* disqualify ourselves if not running in a Cray PAGG container */
fd = fopen(proc_job_file, "r");
if (fd == NULL) {
*priority = 0;
*module = NULL;
rc = ORTE_ERROR;
} else {
snprintf(task_is_app_fname,sizeof(task_is_app_fname),
"/proc/self/task/%ld/task_is_app",syscall(SYS_gettid));
fd_task_is_app = fopen(task_is_app_fname, "r");
if (fd_task_is_app != NULL) { /* okay we're in a PAGG container,
and we are an app task (not just a process
running on a mom node, for example),
so we should give cray pmi a shot. */
*priority = 10; /* take precendence over base */
*module = (mca_base_module_t *) &orte_odls_alps_module;
fclose(fd_task_is_app);
rc = orte_odls_alps_get_rdma_creds();
}
fclose(fd);
rc = orte_common_alps_proc_in_pagg(&flag);
if ((ORTE_SUCCESS == rc) && flag) {
*priority = 10; /* take precendence over base */
*module = (mca_base_module_t *) &orte_odls_alps_module;
}
return rc;

Просмотреть файл

@ -740,6 +740,15 @@ int orte_odls_alps_launch_local_procs(opal_buffer_t *data)
return rc;
}
/* get the RDMA credentials and push them into the launch environment */
if (ORTE_SUCCESS != (rc = orte_odls_alps_get_rdma_creds())) {;
OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output,
"%s odls:alps:launch:failed to get GNI rdma credentials %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc)));
return rc;
}
/* launch the local procs */
ORTE_ACTIVATE_LOCAL_LAUNCH(job, odls_alps_fork_local_proc);

Просмотреть файл

@ -231,12 +231,6 @@ int orte_odls_alps_get_rdma_creds(void)
goto fn_exit;
}
#if 0
fprintf(stderr,"apid = 0x%lx ptag0 %d cookie0 0x%x(%d) ptag1 %d cookie1 0x%x(%d)\n",apid,
rdmacred_buf[0].ptag,rdmacred_buf[0].cookie,rdmacred_buf[0].cookie,
rdmacred_buf[1].ptag,rdmacred_buf[1].cookie,rdmacred_buf[1].cookie);
#endif
}
fn_exit: