diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c b/ompi/mca/pubsub/pmi/pubsub_pmi_component.c index c36db09ee5..15a650d0df 100644 --- a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c +++ b/ompi/mca/pubsub/pmi/pubsub_pmi_component.c @@ -115,7 +115,8 @@ static bool pmi_startup(void) static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority) { /* for now, only use PMI when direct launched */ - if (ORTE_PROC_IS_MPI && + if (NULL == orte_process_info.my_hnp_uri && + ORTE_PROC_IS_MPI && pmi_startup()) { /* if PMI is available, use it */ *priority = my_priority; diff --git a/orte/mca/ess/alps/configure.m4 b/orte/mca/ess/alps/configure.m4 index e2662e4e6d..72bdc38255 100644 --- a/orte/mca/ess/alps/configure.m4 +++ b/orte/mca/ess/alps/configure.m4 @@ -48,35 +48,15 @@ AC_DEFUN([MCA_orte_ess_alps_CONFIG],[ [orte_mca_ess_alps_have_cnos=1], [orte_mca_ess_alps_have_cnos=0])]) - dnl now check for PMI support - ORTE_CHECK_PMI([ess_alps], - [orte_mca_ess_alps_have_pmi=1], - [orte_mca_ess_alps_have_pmi=0]) - dnl was ess alps requested? ORTE_CHECK_ALPS([ess_alps], [orte_mca_ess_alps_happy="yes"], [orte_mca_ess_alps_happy="no"]) - dnl cannot continue if we don't have CNOS or PMI - AS_IF([test "$orte_mca_ess_alps_happy" = "yes" -a "$orte_mca_ess_alps_have_cnos" = "0" -a "$orte_mca_ess_alps_have_pmi" = "0"], - [AC_MSG_WARN([Alps support requested (via --with-alps) but adequate support was not found.]) - AC_MSG_ERROR([Cannot continue.])]) - - dnl cannot continue if we have both CNOS and PMI. this will probably - dnl never happen, but it can't hurt to also check for this case. - AS_IF([test "$orte_mca_ess_alps_happy" = "yes" -a "$orte_mca_ess_alps_have_cnos" = "1" -a "$orte_mca_ess_alps_have_pmi" = "1"], - [AC_MSG_WARN([Alps support requested (via --with-alps) but CNOS and PMI support was found.]) - AC_MSG_ERROR([Cannot continue.])]) - AC_DEFINE_UNQUOTED([ORTE_MCA_ESS_ALPS_HAVE_CNOS], [$orte_mca_ess_alps_have_cnos], [Whether we have CNOS support in alps ess or not]) - AC_DEFINE_UNQUOTED([ORTE_MCA_ESS_ALPS_HAVE_PMI], - [$orte_mca_ess_alps_have_pmi], - [Whether we have PMI support in alps ess or not]) - AS_IF([test "$orte_mca_ess_alps_happy" = "yes"], [$1], [$2]) diff --git a/orte/mca/ess/alps/ess_alps_component.c b/orte/mca/ess/alps/ess_alps_component.c index 13dcd725b4..1a798c0428 100644 --- a/orte/mca/ess/alps/ess_alps_component.c +++ b/orte/mca/ess/alps/ess_alps_component.c @@ -5,14 +5,16 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2011 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker @@ -59,22 +61,34 @@ orte_ess_base_component_t mca_ess_alps_component = { } }; - int orte_ess_alps_component_open(void) { return ORTE_SUCCESS; } - int orte_ess_alps_component_query(mca_base_module_t **module, int *priority) { +#if ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1 *priority = 35; *module = (mca_base_module_t *)&orte_ess_alps_module; return ORTE_SUCCESS; +#else + /* if i'm a daemon, then only i can safely select this component if + * PMI_GNI_LOC_ADDR exists */ + if (NULL != getenv("PMI_GNI_LOC_ADDR") && + ORTE_PROC_IS_DAEMON) { + *priority = 35; + *module = (mca_base_module_t *)&orte_ess_alps_module; + return ORTE_SUCCESS; + } + /* can't be selected, so disqualify myself */ + *priority = -1; + *module = NULL; + return ORTE_ERROR; +#endif /* ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1 */ } - int orte_ess_alps_component_close(void) { diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 66184c2263..f109157acd 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -29,8 +29,6 @@ # elif defined(HAVE_CATAMOUNT_CNOS_MPI_OS_H) # include "catamount/cnos_mpi_os.h" # endif -#elif ORTE_MCA_ESS_ALPS_HAVE_PMI == 1 -# include "pmi.h" #endif #include "orte/util/show_help.h" @@ -47,6 +45,8 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/alps/ess_alps.h" +#include + static int alps_set_name(void); static int rte_init(void); static int rte_finalize(void); @@ -78,23 +78,25 @@ get_vpid(orte_vpid_t *outvp, #if ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1 *outvp = (orte_vpid_t)cnos_get_rank() + start_vpid; return ORTE_SUCCESS; -#else /* using PMI */ - /* TODO SKG - PMI utility functions should be in a common area */ - int rank; - PMI_BOOL pmi_initialized; +#else + /* Cray XE6 Notes: + * using PMI_GNI_LOC_ADDR to set vpid. + */ + int rank = 0; + char *env; - if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized)) { + if (NULL == (env = getenv("PMI_GNI_LOC_ADDR"))) { + OPAL_OUTPUT_VERBOSE((0, orte_ess_base_output, + "PMI_GNI_LOC_ADDR not found, cannot continue\n")); ORTE_ERROR_LOG(ORTE_ERROR); return ORTE_ERROR; } - if (PMI_FALSE == pmi_initialized) { - int tmp; - if (PMI_SUCCESS != PMI_Init(&tmp)) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - } - if (PMI_SUCCESS != PMI_Get_rank(&rank)) { + errno = 0; + rank = (int)strtol(env, (char **)NULL, 10); + if (0 != errno) { + OPAL_OUTPUT_VERBOSE((0, orte_ess_base_output, + "strtol error detected at %s:%d\n", __FILE__, + __LINE__)); ORTE_ERROR_LOG(ORTE_ERROR); return ORTE_ERROR; } diff --git a/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c b/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c index 17796b6f82..ed7d4b9c70 100644 --- a/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c +++ b/orte/mca/grpcomm/pmi/grpcomm_pmi_component.c @@ -122,7 +122,9 @@ static bool pmi_startup(void) int orte_grpcomm_pmi_component_query(mca_base_module_t **module, int *priority) { - if (ORTE_PROC_IS_MPI && + /* only use PMI when direct launched */ + if (NULL == orte_process_info.my_hnp_uri && + ORTE_PROC_IS_MPI && pmi_startup()) { /* if PMI is available, make it available for use by MPI procs */ *priority = my_priority; diff --git a/orte/test/system/getenv_pmi.c b/orte/test/system/getenv_pmi.c new file mode 100644 index 0000000000..698863953e --- /dev/null +++ b/orte/test/system/getenv_pmi.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2011 Los Alamos National Security, LLC. + * All rights reserved. + */ + +#include +#include +#include +#include + +#include "pmi.h" + +/* NOTES + * + * useful debug environment variables: + * PMI_DEBUG + */ + +int main(int argc, char **argv, char **envp) +{ + int i; + int pmi_rank = -1; + int pmi_process_group_size = -1; + int num_local_procs = 0; + int *local_rank_ids = NULL; + int spawned = PMI_FALSE; + int rc = EXIT_FAILURE; + pid_t pid = 0; + char *err = NULL; + PMI_BOOL pmi_initialized = PMI_FALSE; + + /* sanity */ + if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized) || + PMI_TRUE == pmi_initialized) { + fprintf(stderr, "=== ERROR: PMI sanity failure\n"); + return EXIT_FAILURE; + } + if (PMI_SUCCESS != PMI_Init(&spawned)) { + err = "PMI_Init failure!"; + goto done; + } + if (PMI_SUCCESS != PMI_Get_size(&pmi_process_group_size)) { + err = "PMI_Get_size failure!"; + goto done; + } + if (PMI_SUCCESS != PMI_Get_rank(&pmi_rank)) { + err = "PMI_Get_rank failure!"; + goto done; + } + if (PMI_SUCCESS != PMI_Get_clique_size(&num_local_procs)) { + err = "PMI_Get_clique_size failure!"; + goto done; + } + if (NULL == (local_rank_ids = calloc(num_local_procs, sizeof(int)))) { + err = "out of resources"; + goto done; + } + if (PMI_SUCCESS != PMI_Get_clique_ranks(local_rank_ids, num_local_procs)) { + err = "PMI_Get_clique_size failure!"; + goto done; + } + /* lowest local rank will print env info and tag its output*/ + if (pmi_rank == local_rank_ids[0]) { + for (; NULL != envp && NULL != *envp; ++envp) { + printf("===[%d]: %s\n", pmi_rank, *envp); + } + } + + rc = EXIT_SUCCESS; + +done: + if (PMI_TRUE == pmi_initialized) { + if (PMI_SUCCESS != PMI_Finalize()) { + err = "PMI_Finalize failure!"; + } + } + if (NULL != err) { + fprintf(stderr, "=== ERROR [rank:%d] %s\n", pmi_rank, err); + } + return rc; +}