diff --git a/VERSION b/VERSION index 7a245a4051..88ba107f5f 100644 --- a/VERSION +++ b/VERSION @@ -107,3 +107,6 @@ libmca_common_verbs_so_version=0:0:0 # OPAL layer libmca_opal_common_pmi_so_version=0:0:0 + +# ORTE layer +libmca_common_alps_so_version=0:0:0 diff --git a/config/orte_check_alps.m4 b/config/orte_check_alps.m4 index 5aed79385b..8160c95c26 100644 --- a/config/orte_check_alps.m4 +++ b/config/orte_check_alps.m4 @@ -17,112 +17,148 @@ # # $HEADER$ # +# ORTE_CHECK_ALPS_CLE4([action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([ORTE_CHECK_ALPS_CLE4],[ + +# +# if we've gotten here, its because we are building on a CLE 4 system +# + orte_check_alps_cle4_libdir_happy="no" + orte_check_alps_cle4_dir_happy="no" + + AC_MSG_CHECKING([Checking for ALPS components on a CLE 4 system with alps $with_alps]) + + AC_ARG_WITH([alps-libdir], + [AC_HELP_STRING([--with-alps-libdir=DIR], + [Location of alps libraries (alpslli, alpsutil) (default: /usr/lib/alps (/opt/cray/xe-sysroot/default/user on eslogin nodes))])]) + +# +# check to see if Open MPI is being built on a CLE 4 eslogin node +# + AS_IF([test -f /etc/opt/cray/release/ESLrelease], + [default_alps_dir="/opt/cray/xe-sysroot/default/usr"], + [default_alps_dir="/usr"]) + + AS_IF([test -z "$with_alps_libdir"], + [AS_IF([test "$with_alps" != "yes" -a "$with_alps" != "auto"], + [AS_IF([test -d "$with_alps_libdir/lib64"], + [orte_check_alps_libdir="$with_alps_libdir/lib64"], + [orte_check_alps_libdir="$with_alps_libdir/lib"])], + [ orte_check_alps_libdir="$default_alps_dir/lib/alps"]) + ],[]) + + AS_IF([test "$with_alps" = "yes" -o "$with_alps" = "auto"], + [orte_check_alps_dir=$default_alps_dir], + [orte_check_alps_dir=$with_alps]) + + AC_MSG_CHECKING([if $orte_check_alps_libdir/libalps.a is present]) + AS_IF([test -f "$orte_check_alps_libdir/libalps.a"], + [orte_check_alps_libdir_cle4_happy="yes"], + [orte_check_alps_libdir_cle4_happy="no", + AC_MSG_RESULT([no])]) + + AC_MSG_CHECKING([if $orte_check_alps_dir/include/alps/apInfo.h is present]) + AS_IF([test -f "$orte_check_alps_dir/include/alps/apInfo.h"], + [orte_check_alps_dir_cle4_happy="yes"], + [orte_check_alps_dir_cle4_happy="no" + AC_MSG_RESULT([no])]) + + AS_IF([test "$orte_check_alps_libdir_cle4_happy" = "yes" -a "$orte_check_alps_dir_cle4_happy" = "yes"], + [CRAY_ALPSLLI_CFLAGS="-I$orte_check_alps_dir/include" + CRAY_ALPSLLI_LIBS="-L$orte_check_alps_libdir -lalpslli -lalpsutil" + CRAY_ALPSLLI_STATIC_LIBS="-L$orte_check_alps_libdir -lalpslli -lalpsutil" + $1], + [$2]) +]) + + + # ORTE_CHECK_ALPS(prefix, [action-if-found], [action-if-not-found]) # -------------------------------------------------------- AC_DEFUN([ORTE_CHECK_ALPS],[ if test -z "$orte_check_alps_happy"; then - # require that we check for pmi support request first so - # we can get the static library ordering correct - AC_REQUIRE([OPAL_CHECK_CRAY_PMI]) AC_ARG_WITH([alps], [AC_HELP_STRING([--with-alps(=DIR|yes|no)], - [Build with ALPS scheduler component, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries (default: no)])]) - OPAL_CHECK_WITHDIR([alps], [$with_alps], [.]) + [Build with ALPS scheduler component, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries (default: auto)])],[],with_alps=auto) - AC_ARG_WITH([alps-libdir], - [AC_HELP_STRING([--with-alps-libdir=DIR], - [Location of alps libraries (alpslli, alpsutil) (default: /usr/lib/alps)])]) - - AC_ARG_WITH([wlm_detect], - [AC_HELP_STRING([--with-wlm_detect(=DIR)], - [Location of wlm_detect library needed by PMI on CLE 5 systems (default: /opt/cray/wlm_detect/default)])]) - - # save the CPPFLAGS so we can check for alps/apInfo.h without adding $with_alps/include to the global path - orte_check_alps_$1_save_CPPFLAGS="$CPPFLAGS" - - # - # check to see where alps is installed, it wandered to a new location starting with CLE 5.0 - # - - if test -f "/usr/lib/alps/libalps.a" ; then - using_cle5_install="no" + if test -f /etc/opt/cray/release/clerelease; then + cle_level=`awk -F. '{print [$]1}' /etc/opt/cray/release/clerelease` else - using_cle5_install="yes" - if test -z "$with_wlm_detect" ; then - with_wlm_detect="/opt/cray/wlm_detect/default" - fi - - # libpmi requires libugni for static linking on CLE 5. WTH! - OPAL_CHECK_UGNI($1,[orte_check_alps_happy=yes],[orte_check_alps_happy=no]) + cle_level="unknown" fi - if test "$with_alps" = "no" -o -z "$with_alps" ; then - orte_check_alps_happy="no" - else - # Only need to do these tests once - this macro is invoked - # from multiple different components' configure.m4 scripts + AC_MSG_CHECKING([for ALPS support cle level $cle_level]) + AS_IF([test "$cle_level" = "4" -a "$with_alps" != "no"], + [ORTE_CHECK_ALPS_CLE4([orte_check_cray_alps_happy="yes"], + [orte_check_cray_alps_happy="no"])], + [AS_IF([test "$with_alps" = "no"], + [AC_MSG_RESULT([no]) + $3], + [AS_IF([test "$with_alps" = "auto" -o "$with_alps" = "yes"], + [PKG_CHECK_MODULES_STATIC([CRAY_ALPSLLI], [cray-alpslli], + [orte_check_cray_alps_happy="yes"], + [orte_check_cray_alps_happy="no"] + [AS_IF([test "$with_alps" = "yes"], + [AC_MSG_WARN([ALPS support requested but pkg-config failed.]) + AC_MSG_WARN([Need to explicitly indicate ALPS directory]) + AC_MSG_WARN([on the configure line using --with-alps option.]) + AC_MSG_ERROR([Aborting])],[])] + ) + PKG_CHECK_MODULES_STATIC([CRAY_ALPSUTIL], [cray-alpsutil], + [orte_check_cray_alps_happy="yes"], + [orte_check_cray_alps_happy="no"] + [AS_IF([test "$with_alps" = "yes"], + [AC_MSG_WARN([ALPS support requested but pkg-config failed.]) + AC_MSG_WARN([Need to explicitly indicate ALPS directory]) + AC_MSG_WARN([on the configure line using --with-alps option.]) + AC_MSG_ERROR([Aborting])],[])] + ) - orte_check_alps_happy="yes" - orte_check_alps_libdir="$with_alps_libdir" - - if test -z "$orte_check_alps_libdir" ; then - if test "$with_alps" != "yes" ; then - AS_IF([test -d "$with_alps/lib64"], - [orte_check_alps_libdir="$with_alps/lib64"], - [orte_check_alps_libdir="$with_alps/lib"]) - else - if test "$using_cle5_install" = "yes"; then - orte_check_alps_libdir="/opt/cray/alps/default/lib64" - else - orte_check_alps_libdir="/usr/lib/alps" - fi - fi - fi + PKG_CHECK_MODULES_STATIC([CRAY_ALPS], [cray-alps], + [orte_check_cray_alps_happy="yes"], + [orte_check_cray_alps_happy="no"] + [AS_IF([test "$with_alps" = "yes"], + [AC_MSG_WARN([ALPS support requested but pkg-config failed.]) + AC_MSG_WARN([Need to explicitly indicate ALPS directory]) + AC_MSG_WARN([on the configure line using --with-alps option.]) + AC_MSG_ERROR([Aborting])],[])] + ) - if test "$with_alps" = "yes" ; then - AS_IF([test "$using_cle5_install" = "yes"], - [orte_check_alps_dir="/opt/cray/alps/default"], - [orte_check_alps_dir="/usr"]) - else - orte_check_alps_dir="$with_alps" - fi + ], + [AC_MSG_WARN([See ./configure --help for how to control Open MPI]) + AC_MSG_WARN([configuration for ALPS on CLE 5 and higher systems]) + AC_MSG_ERROR([Aborting])]) + ]) + ]) - if test -z "$orte_check_alps_pmi_happy"; then - # if pmi support is requested, then OPAL_CHECK_PMI - # will have added the -lpmi flag to LIBS. We then need - # to add a couple of alps libs to support static - # builds - if test "$opal_enable_pmi" = 1 ; then - AC_MSG_CHECKING([for alps libraries in "$orte_check_alps_libdir"]) + AC_MSG_RESULT([orte_check_cray_alps_happy = $orte_check_cray_alps_happy]) + + AS_IF([test "$orte_check_cray_alps_happy" = "yes" -a "$enable_static" = "yes"], + [CRAY_ALPSLLI_LIBS = $CRAY_ALPSLLI_STATIC_LIBS + CRAY_ALPSUTIL_LIBS = $CRAY_ALPSUTIL_STATIC_LIBS], + []) + + AC_MSG_RESULT([CRAY_ALPSLLI_STATIC_LIBS - $CRAY_ALPSLLI_STATIC_LIBS]) + AC_MSG_RESULT([CRAY_ALPSLLI_LIBS - $CRAY_ALPSLLI_LIBS]) + AC_MSG_RESULT([CRAY_ALPSLLI_CFLAGS - $CRAY_ALPSLLI_CFLAGS]) + + AC_MSG_RESULT([CRAY_ALPSUTIL_STATIC_LIBS - $CRAY_ALPSUTIL_STATIC_LIBS]) + AC_MSG_RESULT([CRAY_ALPSUTIL_LIBS - $CRAY_ALPSUTIL_LIBS]) + AC_MSG_RESULT([CRAY_ALPSUTIL_CFLAGS - $CRAY_ALPSUTIL_CFLAGS]) + + AC_MSG_RESULT([CRAY_ALPS_CFLAGS - $CRAY_ALPS_CFLAGS]) + + AS_IF([test "$orte_check_cray_alps_happy" = "yes"], + [$1_LDFLAGS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS" + $1_CPPFLAGS="$CRAY_ALPSLLI_CFLAGS $CRAY_ALPSUTIL_CFLAGS $CRAY_ALPS_CFLAGS" + $1_LIBS="$CRAY_ALPSLLI_LIBS $CRAY_ALPSUTIL_LIBS"], + []) - # libalpslli and libalpsutil are needed by libpmi to compile statically - AS_IF([test -f "$orte_check_alps_libdir/libalpslli.a" -a -f "$orte_check_alps_libdir/libalpsutil.a"], - [AC_MSG_RESULT([found]) - orte_check_alps_pmi_happy=yes], - [AC_MSG_WARN([PMI support for Alps requested but not found]) - AC_MSG_ERROR([Cannot continue])]) - fi - fi - fi fi - # Set LIBS, CPPFLAGS, and LDFLAGS here so they always get set - if test "$orte_check_alps_happy" = "yes" -a "$opal_enable_pmi" = 1 ; then - $1_LIBS="-lalpslli -lalpsutil" - fi - - $1_CPPFLAGS="-I$orte_check_alps_dir/include" - $1_LDFLAGS="-L$orte_check_alps_libdir" - - # Add CLE 5 library dependencies - if test "using_cle5_install" = "yes" ; then - $1_LIBS="$$1_LIBS -lwlm_detect" - $1_LDFLAGS="$$1_LDFLAGS -L$with_wlm_detect" - fi - - AS_IF([test "$orte_check_alps_happy" = "yes"], - [$2], - [$3]) + AS_IF([test "$orte_check_cray_alps_happy" = "yes"], + [$2], [$3]) ]) diff --git a/configure.ac b/configure.ac index 8212fcbf42..87095309fa 100644 --- a/configure.ac +++ b/configure.ac @@ -157,6 +157,7 @@ AC_SUBST(libmca_common_ofacm_so_version) AC_SUBST(libmca_common_sm_so_version) AC_SUBST(libmca_common_ugni_so_version) AC_SUBST(libmca_common_verbs_so_version) +AC_SUBST(libmca_common_alps_so_version) # # Get the versions of the autotools that were used to bootstrap us diff --git a/opal/mca/pmix/cray/pmix_cray_component.c b/opal/mca/pmix/cray/pmix_cray_component.c index 8027d54f96..21a9fd658a 100644 --- a/opal/mca/pmix/cray/pmix_cray_component.c +++ b/opal/mca/pmix/cray/pmix_cray_component.c @@ -105,10 +105,6 @@ static int pmix_cray_component_query(mca_base_module_t **module, int *priority) static int pmix_cray_component_close(void) { - int ret = OPAL_SUCCESS; - - ret = opal_pmix_cray_module.finalize(); - - return ret; + return OPAL_SUCCESS; } diff --git a/orte/mca/common/Makefile.am b/orte/mca/common/Makefile.am new file mode 100644 index 0000000000..33bbb5f2a3 --- /dev/null +++ b/orte/mca/common/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Note that this file must exist, even though it is empty (there is no +# "base" directory for the common framework). autogen.pl and +# opal_mca.m4 assume that every framework has a top-level Makefile.am. +# We *could* adjust the framework glue code to exclude "common" from +# this requirement, but it's just a lot easier to have an empty +# Makefile.am here. diff --git a/orte/mca/common/alps/Makefile.am b/orte/mca/common/alps/Makefile.am new file mode 100644 index 0000000000..f7e8be173d --- /dev/null +++ b/orte/mca/common/alps/Makefile.am @@ -0,0 +1,66 @@ +# +# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. +# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. +# Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(common_alps_CPPFLAGS) + +headers = \ + common_alps.h + +sources = \ + common_alps.c + + +lib_LTLIBRARIES = +noinst_LTLIBRARIES = +comp_inst = lib@ORTE_LIB_PREFIX@mca_common_alps.la +comp_noinst = lib@ORTE_LIB_PREFIX@mca_common_alps_noinst.la + +if MCA_BUILD_orte_common_alps_DSO +lib_LTLIBRARIES += $(comp_inst) +else +noinst_LTLIBRARIES += $(comp_noinst) +endif + +lib@ORTE_LIB_PREFIX@mca_common_alps_la_SOURCES = $(headers) $(sources) +lib@ORTE_LIB_PREFIX@mca_common_alps_la_CPPFLAGS = $(common_alps_CPPFLAGS) +lib@ORTE_LIB_PREFIX@mca_common_alps_la_LDFLAGS = \ + -version-info $(libmca_common_alps_so_version) \ + $(common_alps_LDFLAGS) +lib@ORTE_LIB_PREFIX@mca_common_alps_la_LIBADD = $(common_alps_LIBS) +lib@ORTE_LIB_PREFIX@mca_common_alps_noinst_la_SOURCES = $(headers) $(sources) + +# Conditionally install the header files + +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/opal/mca/common/common_alps.h +opal_HEADERS = $(headers) +else +opaldir = $(includedir) +endif + +# These two rules will sym link the "noinst" libtool library filename +# to the installable libtool library filename in the case where we are +# compiling this component statically (case 2), described above). +V=0 +OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V) +ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY) +ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`; + +all-local: + $(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \ + fi + +clean-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + fi diff --git a/orte/mca/common/alps/common_alps.c b/orte/mca/common/alps/common_alps.c new file mode 100644 index 0000000000..6fd77ad054 --- /dev/null +++ b/orte/mca/common/alps/common_alps.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal/types.h" + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/mca/common/alps/common_alps.h" + +#include +#include +#include + + +/* + * determine whether or not calling process is in a Cray PAGG container + */ + +int orte_common_alps_proc_in_pagg(bool *flag) +{ + int rc = ORTE_SUCCESS; + const char proc_job_file[]="/proc/job"; + FILE *fd = NULL, *fd_task_is_app = NULL; + char task_is_app_fname[PATH_MAX]; + + if (flag == NULL) { + return ORTE_ERR_BAD_PARAM; + } + + fd = fopen(proc_job_file, "r"); + if (fd == NULL) { + *flag = 0; + } else { + snprintf(task_is_app_fname,sizeof(task_is_app_fname), + "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); + fd_task_is_app = fopen(task_is_app_fname, "r"); + if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, + and we are an app task (not just a process + running on a mom node, for example), */ + *flag = 1; + fclose(fd_task_is_app); + } else { + *flag = 0; + } + fclose(fd); + } + + return rc; +} + diff --git a/orte/mca/common/alps/common_alps.h b/orte/mca/common/alps/common_alps.h new file mode 100644 index 0000000000..21333f733d --- /dev/null +++ b/orte/mca/common/alps/common_alps.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. + * All rights reserved. + * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. + * Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _COMMON_ALPS_H_ +#define _COMMON_ALPS_H_ + +#include "opal_config.h" + +BEGIN_C_DECLS + +/** + * Determine if calling process is in a Cray PAGG job container. + * flag set to TRUE if the process is in a PAGG, otherwise FALSE. + */ +OPAL_DECLSPEC int orte_common_alps_proc_in_pagg(bool *flag); + +END_C_DECLS + +#endif + diff --git a/orte/mca/common/alps/configure.m4 b/orte/mca/common/alps/configure.m4 new file mode 100644 index 0000000000..519dd23b0d --- /dev/null +++ b/orte/mca/common/alps/configure.m4 @@ -0,0 +1,40 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. +# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. +# Copyright (c) 2014 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_orte_common_alps_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_orte_common_alps_CONFIG],[ + AC_CONFIG_FILES([orte/mca/common/alps/Makefile]) + + ORTE_CHECK_ALPS([common_alps], [common_alps_happy="yes"], [common_alps_happy="no"]) + + AS_IF([test "$common_alps_happy" = "yes"], + [$1 + AC_SUBST([common_alps_CPPFLAGS]) + AC_SUBST([common_alps_LDFLAGS]) + AC_SUBST([common_alps_LIBS])], + [$2]) + # +])dnl diff --git a/orte/mca/ess/alps/Makefile.am b/orte/mca/ess/alps/Makefile.am new file mode 100644 index 0000000000..36d32fd900 --- /dev/null +++ b/orte/mca/ess/alps/Makefile.am @@ -0,0 +1,51 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + ess_alps.h \ + ess_alps_component.c \ + ess_alps_module.c \ + ess_alps_utils.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_ess_alps_DSO +component_noinst = +component_install = mca_ess_alps.la +else +component_noinst = libmca_ess_alps.la +component_install = +endif + +mcacomponentdir = $(ompilibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_ess_alps_la_SOURCES = $(sources) +mca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS) -fno-ident +mca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS) +mca_ess_alps_la_LIBADD = $(ess_alps_LDFLAGS) \ + $(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_ess_alps_la_SOURCES =$(sources) +libmca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS) +libmca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS) +libmca_ess_alps_la_LIBADD = $(ess_alps_LIBS) + diff --git a/orte/mca/ess/alps/configure.m4 b/orte/mca/ess/alps/configure.m4 new file mode 100644 index 0000000000..6ad187ae3b --- /dev/null +++ b/orte/mca/ess/alps/configure.m4 @@ -0,0 +1,37 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_ess_alps_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_orte_ess_alps_CONFIG],[ + AC_CONFIG_FILES([orte/mca/ess/alps/Makefile]) + + ORTE_CHECK_ALPS([ess_alps], [ess_alps_happy="yes"], [ess_alps_happy="no"]) + + AS_IF([test "$ess_alps_happy" = "yes"], + [$1 + AC_SUBST([ess_alps_CPPFLAGS]) + AC_SUBST([ess_alps_LDFLAGS]) + AC_SUBST([ess_alps_LIBS])], + [$2]) + +])dnl diff --git a/orte/mca/ess/alps/ess_alps.h b/orte/mca/ess/alps/ess_alps.h new file mode 100644 index 0000000000..70c4098c6a --- /dev/null +++ b/orte/mca/ess/alps/ess_alps.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_ESS_ALPS_H +#define ORTE_ESS_ALPS_H + +#include "orte_config.h" +#include "opal/mca/mca.h" +#include "orte/mca/ess/ess.h" + +#include "alps/alps.h" +#include "alps/alps_toolAssist.h" +#include "alps/libalpsutil.h" +#include "alps/libalpslli.h" + +BEGIN_C_DECLS + +/* + * Module open / close + */ +int orte_ess_alps_component_open(void); +int orte_ess_alps_component_close(void); +int orte_ess_alps_component_query(mca_base_module_t **module, int *priority); + +/* + * alps component internal utility functions + */ + +int orte_ess_alps_get_first_rank_on_node(int *first_rank); +int orte_ess_alps_sync_start(void); +int orte_ess_alps_sync_complete(void); + +/* + * ODLS Alps module + */ +extern orte_ess_base_module_t orte_ess_alps_module; +ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_alps_component; + +END_C_DECLS + +#endif /* ORTE_ESS_ALPS_H */ diff --git a/orte/mca/ess/alps/ess_alps_component.c b/orte/mca/ess/alps/ess_alps_component.c new file mode 100644 index 0000000000..830f532776 --- /dev/null +++ b/orte/mca/ess/alps/ess_alps_component.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2012 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/runtime/orte_globals.h" +#include "orte/util/proc_info.h" +#include "orte/mca/common/alps/common_alps.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/alps/ess_alps.h" + +#include + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +orte_ess_base_component_t mca_ess_alps_component = { + /* First, the mca_component_t struct containing meta information + about the component itself */ + { + ORTE_ESS_BASE_VERSION_3_0_0, + + /* Component name and version */ + "alps", + ORTE_MAJOR_VERSION, + ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION, + + /* Component open and close functions */ + orte_ess_alps_component_open, + orte_ess_alps_component_close, + orte_ess_alps_component_query + }, + { + /* The component is not checkpoint ready */ + MCA_BASE_METADATA_PARAM_NONE + } +}; + +int +orte_ess_alps_component_open(void) +{ + return ORTE_SUCCESS; +} + +int orte_ess_alps_component_query(mca_base_module_t **module, int *priority) +{ + int rc = ORTE_SUCCESS; + bool flag; + + /* + * don't use the alps ess component if an app proc + */ + + if (ORTE_PROC_IS_APP) { + *priority = 0; + *module = NULL; + return ORTE_ERROR; + } + + /* + * make sure we're in a Cray PAGG container, and that we are also on + * a compute node (i.e. we are thought of as an application task by + * the cray job kernel module - the thing that creates the PAGG) + */ + + rc = orte_common_alps_proc_in_pagg(&flag); + if ((ORTE_SUCCESS == rc) && flag) { + *priority = 35; /* take precendence over base */ + *module = (mca_base_module_t *) &orte_ess_alps_module; + } + + return rc; +} + +int +orte_ess_alps_component_close(void) +{ + return ORTE_SUCCESS; +} + diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c new file mode 100644 index 0000000000..b557b155ad --- /dev/null +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "orte/util/show_help.h" +#include "opal/util/argv.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" +#include "orte/util/regex.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/alps/ess_alps.h" + +#include + +static int alps_set_name(void); +static int rte_init(void); +static int rte_finalize(void); + +orte_ess_base_module_t orte_ess_alps_module = { + rte_init, + rte_finalize, + orte_ess_base_app_abort, + NULL /* ft_event */ +}; + +/* Local variables */ +static orte_vpid_t starting_vpid = 0; + + +static int rte_init(void) +{ + int ret, i; + char *error = NULL; + char **hosts = NULL; + + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, + "ess:alps in rte_init")); + + /* + * shouldn't have been able to open this ess component if + * process is app proc + */ + + if (ORTE_PROC_IS_APP) { + error = "mpi rank invoking alps rte_init"; + ret = ORTE_ERR_NOT_SUPPORTED; + goto fn_fail; + } + + /* run the prolog */ + if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { + error = "orte_ess_base_std_prolog"; + goto fn_fail; + } + + if (ORTE_SUCCESS != (ret = alps_set_name())) { + error = "alps_set_name"; + goto fn_fail; + } + + /* + * if I am a daemon, complete my setup using the + * default procedure + */ + if (ORTE_PROC_IS_DAEMON) { + if (NULL != orte_node_regex) { + /* extract the nodes */ + if (ORTE_SUCCESS != (ret = + orte_regex_extract_node_names(orte_node_regex, &hosts)) || + NULL == hosts) { + error = "orte_regex_extract_node_names"; + goto fn_fail; + } + + /* find our host in the list */ + for (i=0; NULL != hosts[i]; i++) { + if (0 == strncmp(hosts[i], orte_process_info.nodename, + strlen(hosts[i]))) { + /* correct our vpid - this is probably not necessary with aprun*/ + ORTE_PROC_MY_NAME->vpid = starting_vpid + i; + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, + "ess:alps reset name to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + break; + } + } + } + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto fn_fail; + } + if (NULL != hosts) { + opal_argv_free(hosts); + } + + /* + * now synchronize with aprun. + */ + + if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_start())) { + error = "orte_ess_alps_sync"; + goto fn_fail; + } + + ret = ORTE_SUCCESS; + goto fn_exit; + } + + if (ORTE_PROC_IS_TOOL) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto fn_fail; + } + /* as a tool, I don't need a nidmap - so just return now */ + ret = ORTE_SUCCESS; + goto fn_exit; + } + + fn_exit: + return ret; + + fn_fail: + if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { + orte_show_help("help-orte-runtime.txt", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + } + goto fn_exit; +} + +static int rte_finalize(void) +{ + int ret = ORTE_SUCCESS; + + /* if I am a daemon, finalize using the default procedure */ + if (ORTE_PROC_IS_DAEMON) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + goto fn_exit; + } + + /* notify alps that we're done */ + if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_complete())) { + ORTE_ERROR_LOG(ret); + } + + } else if (ORTE_PROC_IS_TOOL) { + /* otherwise, if I am a tool proc, use that procedure */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } + } + + fn_exit: + return ret; +} + +static int alps_set_name(void) +{ + int rc; + int rank; + orte_jobid_t jobid; + + if (NULL == orte_ess_base_jobid) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + if (NULL == orte_ess_base_vpid) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&starting_vpid, + orte_ess_base_vpid))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + ORTE_PROC_MY_NAME->jobid = jobid; + + if (ORTE_SUCCESS != (rc = orte_ess_alps_get_first_rank_on_node(&rank))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + ORTE_PROC_MY_NAME->vpid = (orte_vpid_t)rank + starting_vpid; + + /* get the num procs as provided in the cmd line param */ + if (ORTE_SUCCESS != (rc = orte_ess_env_get())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/ess/alps/ess_alps_utils.c b/orte/mca/ess/alps/ess_alps_utils.c new file mode 100644 index 0000000000..cc7054638b --- /dev/null +++ b/orte/mca/ess/alps/ess_alps_utils.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "orte/util/show_help.h" +#include "opal/util/argv.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" +#include "orte/util/regex.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/ess.h" +#include "orte/mca/ess/base/base.h" +#include "orte/mca/ess/alps/ess_alps.h" + +/* + * use the Alps placement file to obtain + * the global rank of the "first" local rank + * on the node. + */ + +int +orte_ess_alps_get_first_rank_on_node(int *first_rank) +{ + int alps_status = 0; + uint64_t apid; + size_t alps_count; + int ret = ORTE_SUCCESS; + int lli_ret = 0, place_ret; + alpsAppLayout_t orted_layout; + + if (first_rank == NULL) { + ret = ORTE_ERR_BAD_PARAM; + goto fn_exit; + } + + /* + * First get our apid + */ + + lli_ret = alps_app_lli_lock(); + if (0 != ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_lock returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + goto fn_exit; + } + + lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_APID, NULL, 0); + if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_put_request - APID returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + goto fn_exit_w_lock; + } + + lli_ret = alps_app_lli_get_response (&alps_status, &alps_count); + if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_get_response returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status)); + ret = ORTE_ERR_FILE_READ_FAILURE; + goto fn_exit_w_lock; + } + + lli_ret = alps_app_lli_get_response_bytes (&apid, sizeof(apid)); + if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_get_response_bytes returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_READ_FAILURE; + goto fn_exit_w_lock; + } + + place_ret = alps_get_placement_info(apid, + &orted_layout, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL); + if (1 != place_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_get_placement_info returned %d (%s)", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, strerror(errno))); + ret = ORTE_ERROR; + goto fn_exit; + } + + OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_get_placement_info returned %d first pe on node is %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), place_ret, orted_layout.firstPe)); + *first_rank = orted_layout.firstPe; + + fn_exit_w_lock: + lli_ret = alps_app_lli_unlock(); + if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_unlock returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + } + + fn_exit: + return ret; +} + +/* + * Function to check in with apshepherd to say we are a parallel application + */ +int +orte_ess_alps_sync_start(void) +{ + int ret = ORTE_SUCCESS; + int lli_ret = 0; + int alps_status = 0; + size_t alps_count; + + lli_ret = alps_app_lli_lock(); + if (0 != ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_lock returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + goto fn_exit; + } + + lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_START, NULL, 0); + if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_put_request returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + goto fn_exit_w_lock; + } + + lli_ret = alps_app_lli_get_response (&alps_status, &alps_count); + if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_get_response returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status)); + ret = ORTE_ERR_FILE_READ_FAILURE; + goto fn_exit_w_lock; + } + + fn_exit_w_lock: + lli_ret = alps_app_lli_unlock(); + if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_unlock returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + } + + fn_exit: + return ret; +} + +/* + * Function to check in with apshepherd to say we are a parallel application + */ + +int +orte_ess_alps_sync_complete(void) +{ + int ret = ORTE_SUCCESS; + int lli_ret = 0; + int alps_status = 0; + size_t alps_count; + + lli_ret = alps_app_lli_lock(); + if (0 != ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_lock returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + goto fn_exit; + } + + lli_ret = alps_app_lli_put_request(ALPS_APP_LLI_ALPS_REQ_EXITING, NULL, 0); + if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_put_request returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + goto fn_exit_w_lock; + } + + lli_ret = alps_app_lli_get_response (&alps_status, &alps_count); + if (ALPS_APP_LLI_ALPS_STAT_OK != alps_status) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_get_response returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), alps_status)); + ret = ORTE_ERR_FILE_READ_FAILURE; + goto fn_exit_w_lock; + } + + fn_exit_w_lock: + lli_ret = alps_app_lli_unlock(); + if (ALPS_APP_LLI_ALPS_STAT_OK != lli_ret) { + OPAL_OUTPUT_VERBOSE((20, orte_ess_base_framework.framework_output, + "%s ess:alps: alps_app_lli_unlock returned %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lli_ret)); + ret = ORTE_ERR_FILE_WRITE_FAILURE; + } + + fn_exit: + return ret; +} + + diff --git a/orte/mca/odls/alps/Makefile.am b/orte/mca/odls/alps/Makefile.am index 3c907144e6..145f5a384e 100644 --- a/orte/mca/odls/alps/Makefile.am +++ b/orte/mca/odls/alps/Makefile.am @@ -44,7 +44,8 @@ mcacomponent_LTLIBRARIES = $(component_install) mca_odls_alps_la_SOURCES = $(sources) mca_odls_alps_la_CPPFLAGS = $(odls_alps_CPPFLAGS) mca_odls_alps_la_LDFLAGS = -module -avoid-version $(odls_alps_LDFLAGS) -mca_odls_alps_la_LIBADD = $(odls_alps_LIBS) +mca_odls_alps_la_LIBADD = $(odls_alps_LIBS) \ + $(ORTE_TOP_BUILDDIR)/orte/mca/common/alps/lib@ORTE_LIB_PREFIX@mca_common_alps.la noinst_LTLIBRARIES = $(component_noinst) libmca_odls_alps_la_SOURCES =$(sources) diff --git a/orte/mca/odls/alps/configure.m4 b/orte/mca/odls/alps/configure.m4 index a5d710c1d8..b8b7bad20f 100644 --- a/orte/mca/odls/alps/configure.m4 +++ b/orte/mca/odls/alps/configure.m4 @@ -25,29 +25,13 @@ AC_DEFUN([MCA_orte_odls_alps_CONFIG],[ AC_CONFIG_FILES([orte/mca/odls/alps/Makefile]) - odls_alps_lli_happy="no" - odls_alps_util_happy="no" + ORTE_CHECK_ALPS([odls_alps], [odls_alps_happy="yes"], [odls_alps_happy="no"]) - PKG_CHECK_MODULES([CRAY_ALPS_LLI], [cray-alpslli], - [odls_alps_CPPFLAGS=$CRAY_ALPS_LLI_CFLAGS - odls_alps_LDFLAGS=$CRAY_ALPS_LLI_LIBS - odls_alps_LIBS=$CRAY_ALPS_LLI_LIBS - odls_alps_lli_happy="yes"], - [AC_MSG_RESULT([no])]) - - PKG_CHECK_MODULES([CRAY_ALPS_UTIL], [cray-alpsutil], - [odls_alps_CPPFLAGS="$odls_alps_CPPFLAGS $CRAY_ALPS_UTIL_CFLAGS" - odls_alps_LDFLAGS="$odls_alps_LDFLAGS $CRAY_ALPS_UTIL_LIBS" - odls_alps_LIBS="$odls_alps_LIBS $CRAY_ALPS_LLI_LIBS" - odls_alps_util_happy="yes"], - [AC_MSG_RESULT([no])]) - - AS_IF([test "$odls_alps_lli_happy" = "yes" -a "$odls_alps_util_happy" = "yes"], + AS_IF([test "$odls_alps_happy" = "yes"], [$1 AC_SUBST([odls_alps_CPPFLAGS]) AC_SUBST([odls_alps_LDFLAGS]) AC_SUBST([odls_alps_LIBS])], [$2]) - ])dnl diff --git a/orte/mca/odls/alps/odls_alps_component.c b/orte/mca/odls/alps/odls_alps_component.c index a19f276e7e..b5dc275879 100644 --- a/orte/mca/odls/alps/odls_alps_component.c +++ b/orte/mca/odls/alps/odls_alps_component.c @@ -37,6 +37,7 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" +#include "orte/mca/common/alps/common_alps.h" #include "orte/mca/odls/odls.h" #include "orte/mca/odls/base/odls_private.h" #include "orte/mca/odls/alps/odls_alps.h" @@ -77,9 +78,7 @@ int orte_odls_alps_component_open(void) int orte_odls_alps_component_query(mca_base_module_t **module, int *priority) { int rc = ORTE_SUCCESS; - const char proc_job_file[]="/proc/job"; - FILE *fd = NULL, *fd_task_is_app = NULL; - char task_is_app_fname[PATH_MAX]; + bool flag; /* * make sure we're in a daemon process @@ -97,26 +96,10 @@ int orte_odls_alps_component_query(mca_base_module_t **module, int *priority) * the cray job kernel module - the thing that creates the PAGG */ - /* disqualify ourselves if not running in a Cray PAGG container */ - fd = fopen(proc_job_file, "r"); - if (fd == NULL) { - *priority = 0; - *module = NULL; - rc = ORTE_ERROR; - } else { - snprintf(task_is_app_fname,sizeof(task_is_app_fname), - "/proc/self/task/%ld/task_is_app",syscall(SYS_gettid)); - fd_task_is_app = fopen(task_is_app_fname, "r"); - if (fd_task_is_app != NULL) { /* okay we're in a PAGG container, - and we are an app task (not just a process - running on a mom node, for example), - so we should give cray pmi a shot. */ - *priority = 10; /* take precendence over base */ - *module = (mca_base_module_t *) &orte_odls_alps_module; - fclose(fd_task_is_app); - rc = orte_odls_alps_get_rdma_creds(); - } - fclose(fd); + rc = orte_common_alps_proc_in_pagg(&flag); + if ((ORTE_SUCCESS == rc) && flag) { + *priority = 10; /* take precendence over base */ + *module = (mca_base_module_t *) &orte_odls_alps_module; } return rc; diff --git a/orte/mca/odls/alps/odls_alps_module.c b/orte/mca/odls/alps/odls_alps_module.c index c4007d2cf3..50dbdf72a8 100644 --- a/orte/mca/odls/alps/odls_alps_module.c +++ b/orte/mca/odls/alps/odls_alps_module.c @@ -740,6 +740,15 @@ int orte_odls_alps_launch_local_procs(opal_buffer_t *data) return rc; } + /* get the RDMA credentials and push them into the launch environment */ + + if (ORTE_SUCCESS != (rc = orte_odls_alps_get_rdma_creds())) {; + OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, + "%s odls:alps:launch:failed to get GNI rdma credentials %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); + return rc; + } + /* launch the local procs */ ORTE_ACTIVATE_LOCAL_LAUNCH(job, odls_alps_fork_local_proc); diff --git a/orte/mca/odls/alps/odls_alps_utils.c b/orte/mca/odls/alps/odls_alps_utils.c index cc02225116..8236038d14 100644 --- a/orte/mca/odls/alps/odls_alps_utils.c +++ b/orte/mca/odls/alps/odls_alps_utils.c @@ -231,12 +231,6 @@ int orte_odls_alps_get_rdma_creds(void) goto fn_exit; } -#if 0 - fprintf(stderr,"apid = 0x%lx ptag0 %d cookie0 0x%x(%d) ptag1 %d cookie1 0x%x(%d)\n",apid, - rdmacred_buf[0].ptag,rdmacred_buf[0].cookie,rdmacred_buf[0].cookie, - rdmacred_buf[1].ptag,rdmacred_buf[1].cookie,rdmacred_buf[1].cookie); -#endif - } fn_exit: