1
1

only use pmi grpcomm and pubsub during the direct launch case. use PMI environment variable to setup vpid in ess alps on cray xe systems. add pmi test code.

This commit was SVN r25447.
Этот коммит содержится в:
Samuel Gutierrez 2011-11-06 17:28:40 +00:00
родитель 34f0a27cb6
Коммит e03bc93fb7
6 изменённых файлов: 123 добавлений и 43 удалений

Просмотреть файл

@ -115,7 +115,8 @@ static bool pmi_startup(void)
static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority)
{
/* for now, only use PMI when direct launched */
if (ORTE_PROC_IS_MPI &&
if (NULL == orte_process_info.my_hnp_uri &&
ORTE_PROC_IS_MPI &&
pmi_startup()) {
/* if PMI is available, use it */
*priority = my_priority;

Просмотреть файл

@ -48,35 +48,15 @@ AC_DEFUN([MCA_orte_ess_alps_CONFIG],[
[orte_mca_ess_alps_have_cnos=1],
[orte_mca_ess_alps_have_cnos=0])])
dnl now check for PMI support
ORTE_CHECK_PMI([ess_alps],
[orte_mca_ess_alps_have_pmi=1],
[orte_mca_ess_alps_have_pmi=0])
dnl was ess alps requested?
ORTE_CHECK_ALPS([ess_alps],
[orte_mca_ess_alps_happy="yes"],
[orte_mca_ess_alps_happy="no"])
dnl cannot continue if we don't have CNOS or PMI
AS_IF([test "$orte_mca_ess_alps_happy" = "yes" -a "$orte_mca_ess_alps_have_cnos" = "0" -a "$orte_mca_ess_alps_have_pmi" = "0"],
[AC_MSG_WARN([Alps support requested (via --with-alps) but adequate support was not found.])
AC_MSG_ERROR([Cannot continue.])])
dnl cannot continue if we have both CNOS and PMI. this will probably
dnl never happen, but it can't hurt to also check for this case.
AS_IF([test "$orte_mca_ess_alps_happy" = "yes" -a "$orte_mca_ess_alps_have_cnos" = "1" -a "$orte_mca_ess_alps_have_pmi" = "1"],
[AC_MSG_WARN([Alps support requested (via --with-alps) but CNOS and PMI support was found.])
AC_MSG_ERROR([Cannot continue.])])
AC_DEFINE_UNQUOTED([ORTE_MCA_ESS_ALPS_HAVE_CNOS],
[$orte_mca_ess_alps_have_cnos],
[Whether we have CNOS support in alps ess or not])
AC_DEFINE_UNQUOTED([ORTE_MCA_ESS_ALPS_HAVE_PMI],
[$orte_mca_ess_alps_have_pmi],
[Whether we have PMI support in alps ess or not])
AS_IF([test "$orte_mca_ess_alps_happy" = "yes"],
[$1],
[$2])

Просмотреть файл

@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -59,22 +61,34 @@ orte_ess_base_component_t mca_ess_alps_component = {
}
};
int
orte_ess_alps_component_open(void)
{
return ORTE_SUCCESS;
}
int orte_ess_alps_component_query(mca_base_module_t **module, int *priority)
{
#if ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1
*priority = 35;
*module = (mca_base_module_t *)&orte_ess_alps_module;
return ORTE_SUCCESS;
#else
/* if i'm a daemon, then only i can safely select this component if
* PMI_GNI_LOC_ADDR exists */
if (NULL != getenv("PMI_GNI_LOC_ADDR") &&
ORTE_PROC_IS_DAEMON) {
*priority = 35;
*module = (mca_base_module_t *)&orte_ess_alps_module;
return ORTE_SUCCESS;
}
/* can't be selected, so disqualify myself */
*priority = -1;
*module = NULL;
return ORTE_ERROR;
#endif /* ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1 */
}
int
orte_ess_alps_component_close(void)
{

Просмотреть файл

@ -29,8 +29,6 @@
# elif defined(HAVE_CATAMOUNT_CNOS_MPI_OS_H)
# include "catamount/cnos_mpi_os.h"
# endif
#elif ORTE_MCA_ESS_ALPS_HAVE_PMI == 1
# include "pmi.h"
#endif
#include "orte/util/show_help.h"
@ -47,6 +45,8 @@
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/alps/ess_alps.h"
#include <errno.h>
static int alps_set_name(void);
static int rte_init(void);
static int rte_finalize(void);
@ -78,23 +78,25 @@ get_vpid(orte_vpid_t *outvp,
#if ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1
*outvp = (orte_vpid_t)cnos_get_rank() + start_vpid;
return ORTE_SUCCESS;
#else /* using PMI */
/* TODO SKG - PMI utility functions should be in a common area */
int rank;
PMI_BOOL pmi_initialized;
#else
/* Cray XE6 Notes:
* using PMI_GNI_LOC_ADDR to set vpid.
*/
int rank = 0;
char *env;
if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized)) {
if (NULL == (env = getenv("PMI_GNI_LOC_ADDR"))) {
OPAL_OUTPUT_VERBOSE((0, orte_ess_base_output,
"PMI_GNI_LOC_ADDR not found, cannot continue\n"));
ORTE_ERROR_LOG(ORTE_ERROR);
return ORTE_ERROR;
}
if (PMI_FALSE == pmi_initialized) {
int tmp;
if (PMI_SUCCESS != PMI_Init(&tmp)) {
ORTE_ERROR_LOG(ORTE_ERROR);
return ORTE_ERROR;
}
}
if (PMI_SUCCESS != PMI_Get_rank(&rank)) {
errno = 0;
rank = (int)strtol(env, (char **)NULL, 10);
if (0 != errno) {
OPAL_OUTPUT_VERBOSE((0, orte_ess_base_output,
"strtol error detected at %s:%d\n", __FILE__,
__LINE__));
ORTE_ERROR_LOG(ORTE_ERROR);
return ORTE_ERROR;
}

Просмотреть файл

@ -122,7 +122,9 @@ static bool pmi_startup(void)
int orte_grpcomm_pmi_component_query(mca_base_module_t **module, int *priority)
{
if (ORTE_PROC_IS_MPI &&
/* only use PMI when direct launched */
if (NULL == orte_process_info.my_hnp_uri &&
ORTE_PROC_IS_MPI &&
pmi_startup()) {
/* if PMI is available, make it available for use by MPI procs */
*priority = my_priority;

81
orte/test/system/getenv_pmi.c Обычный файл
Просмотреть файл

@ -0,0 +1,81 @@
/*
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include "pmi.h"
/* NOTES
*
* useful debug environment variables:
* PMI_DEBUG
*/
int main(int argc, char **argv, char **envp)
{
int i;
int pmi_rank = -1;
int pmi_process_group_size = -1;
int num_local_procs = 0;
int *local_rank_ids = NULL;
int spawned = PMI_FALSE;
int rc = EXIT_FAILURE;
pid_t pid = 0;
char *err = NULL;
PMI_BOOL pmi_initialized = PMI_FALSE;
/* sanity */
if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized) ||
PMI_TRUE == pmi_initialized) {
fprintf(stderr, "=== ERROR: PMI sanity failure\n");
return EXIT_FAILURE;
}
if (PMI_SUCCESS != PMI_Init(&spawned)) {
err = "PMI_Init failure!";
goto done;
}
if (PMI_SUCCESS != PMI_Get_size(&pmi_process_group_size)) {
err = "PMI_Get_size failure!";
goto done;
}
if (PMI_SUCCESS != PMI_Get_rank(&pmi_rank)) {
err = "PMI_Get_rank failure!";
goto done;
}
if (PMI_SUCCESS != PMI_Get_clique_size(&num_local_procs)) {
err = "PMI_Get_clique_size failure!";
goto done;
}
if (NULL == (local_rank_ids = calloc(num_local_procs, sizeof(int)))) {
err = "out of resources";
goto done;
}
if (PMI_SUCCESS != PMI_Get_clique_ranks(local_rank_ids, num_local_procs)) {
err = "PMI_Get_clique_size failure!";
goto done;
}
/* lowest local rank will print env info and tag its output*/
if (pmi_rank == local_rank_ids[0]) {
for (; NULL != envp && NULL != *envp; ++envp) {
printf("===[%d]: %s\n", pmi_rank, *envp);
}
}
rc = EXIT_SUCCESS;
done:
if (PMI_TRUE == pmi_initialized) {
if (PMI_SUCCESS != PMI_Finalize()) {
err = "PMI_Finalize failure!";
}
}
if (NULL != err) {
fprintf(stderr, "=== ERROR [rank:%d] %s\n", pmi_rank, err);
}
return rc;
}