only use pmi grpcomm and pubsub during the direct launch case. use PMI environment variable to setup vpid in ess alps on cray xe systems. add pmi test code.
This commit was SVN r25447.
Этот коммит содержится в:
родитель
34f0a27cb6
Коммит
e03bc93fb7
@ -115,7 +115,8 @@ static bool pmi_startup(void)
|
||||
static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* for now, only use PMI when direct launched */
|
||||
if (ORTE_PROC_IS_MPI &&
|
||||
if (NULL == orte_process_info.my_hnp_uri &&
|
||||
ORTE_PROC_IS_MPI &&
|
||||
pmi_startup()) {
|
||||
/* if PMI is available, use it */
|
||||
*priority = my_priority;
|
||||
|
@ -48,35 +48,15 @@ AC_DEFUN([MCA_orte_ess_alps_CONFIG],[
|
||||
[orte_mca_ess_alps_have_cnos=1],
|
||||
[orte_mca_ess_alps_have_cnos=0])])
|
||||
|
||||
dnl now check for PMI support
|
||||
ORTE_CHECK_PMI([ess_alps],
|
||||
[orte_mca_ess_alps_have_pmi=1],
|
||||
[orte_mca_ess_alps_have_pmi=0])
|
||||
|
||||
dnl was ess alps requested?
|
||||
ORTE_CHECK_ALPS([ess_alps],
|
||||
[orte_mca_ess_alps_happy="yes"],
|
||||
[orte_mca_ess_alps_happy="no"])
|
||||
|
||||
dnl cannot continue if we don't have CNOS or PMI
|
||||
AS_IF([test "$orte_mca_ess_alps_happy" = "yes" -a "$orte_mca_ess_alps_have_cnos" = "0" -a "$orte_mca_ess_alps_have_pmi" = "0"],
|
||||
[AC_MSG_WARN([Alps support requested (via --with-alps) but adequate support was not found.])
|
||||
AC_MSG_ERROR([Cannot continue.])])
|
||||
|
||||
dnl cannot continue if we have both CNOS and PMI. this will probably
|
||||
dnl never happen, but it can't hurt to also check for this case.
|
||||
AS_IF([test "$orte_mca_ess_alps_happy" = "yes" -a "$orte_mca_ess_alps_have_cnos" = "1" -a "$orte_mca_ess_alps_have_pmi" = "1"],
|
||||
[AC_MSG_WARN([Alps support requested (via --with-alps) but CNOS and PMI support was found.])
|
||||
AC_MSG_ERROR([Cannot continue.])])
|
||||
|
||||
AC_DEFINE_UNQUOTED([ORTE_MCA_ESS_ALPS_HAVE_CNOS],
|
||||
[$orte_mca_ess_alps_have_cnos],
|
||||
[Whether we have CNOS support in alps ess or not])
|
||||
|
||||
AC_DEFINE_UNQUOTED([ORTE_MCA_ESS_ALPS_HAVE_PMI],
|
||||
[$orte_mca_ess_alps_have_pmi],
|
||||
[Whether we have PMI support in alps ess or not])
|
||||
|
||||
AS_IF([test "$orte_mca_ess_alps_happy" = "yes"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
@ -9,6 +9,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -59,22 +61,34 @@ orte_ess_base_component_t mca_ess_alps_component = {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int
|
||||
orte_ess_alps_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int orte_ess_alps_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
#if ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1
|
||||
*priority = 35;
|
||||
*module = (mca_base_module_t *)&orte_ess_alps_module;
|
||||
return ORTE_SUCCESS;
|
||||
#else
|
||||
/* if i'm a daemon, then only i can safely select this component if
|
||||
* PMI_GNI_LOC_ADDR exists */
|
||||
if (NULL != getenv("PMI_GNI_LOC_ADDR") &&
|
||||
ORTE_PROC_IS_DAEMON) {
|
||||
*priority = 35;
|
||||
*module = (mca_base_module_t *)&orte_ess_alps_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
/* can't be selected, so disqualify myself */
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
#endif /* ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1 */
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_ess_alps_component_close(void)
|
||||
{
|
||||
|
@ -29,8 +29,6 @@
|
||||
# elif defined(HAVE_CATAMOUNT_CNOS_MPI_OS_H)
|
||||
# include "catamount/cnos_mpi_os.h"
|
||||
# endif
|
||||
#elif ORTE_MCA_ESS_ALPS_HAVE_PMI == 1
|
||||
# include "pmi.h"
|
||||
#endif
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
@ -47,6 +45,8 @@
|
||||
#include "orte/mca/ess/base/base.h"
|
||||
#include "orte/mca/ess/alps/ess_alps.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
static int alps_set_name(void);
|
||||
static int rte_init(void);
|
||||
static int rte_finalize(void);
|
||||
@ -78,23 +78,25 @@ get_vpid(orte_vpid_t *outvp,
|
||||
#if ORTE_MCA_ESS_ALPS_HAVE_CNOS == 1
|
||||
*outvp = (orte_vpid_t)cnos_get_rank() + start_vpid;
|
||||
return ORTE_SUCCESS;
|
||||
#else /* using PMI */
|
||||
/* TODO SKG - PMI utility functions should be in a common area */
|
||||
int rank;
|
||||
PMI_BOOL pmi_initialized;
|
||||
#else
|
||||
/* Cray XE6 Notes:
|
||||
* using PMI_GNI_LOC_ADDR to set vpid.
|
||||
*/
|
||||
int rank = 0;
|
||||
char *env;
|
||||
|
||||
if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized)) {
|
||||
if (NULL == (env = getenv("PMI_GNI_LOC_ADDR"))) {
|
||||
OPAL_OUTPUT_VERBOSE((0, orte_ess_base_output,
|
||||
"PMI_GNI_LOC_ADDR not found, cannot continue\n"));
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
if (PMI_FALSE == pmi_initialized) {
|
||||
int tmp;
|
||||
if (PMI_SUCCESS != PMI_Init(&tmp)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
if (PMI_SUCCESS != PMI_Get_rank(&rank)) {
|
||||
errno = 0;
|
||||
rank = (int)strtol(env, (char **)NULL, 10);
|
||||
if (0 != errno) {
|
||||
OPAL_OUTPUT_VERBOSE((0, orte_ess_base_output,
|
||||
"strtol error detected at %s:%d\n", __FILE__,
|
||||
__LINE__));
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
@ -122,7 +122,9 @@ static bool pmi_startup(void)
|
||||
|
||||
int orte_grpcomm_pmi_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
if (ORTE_PROC_IS_MPI &&
|
||||
/* only use PMI when direct launched */
|
||||
if (NULL == orte_process_info.my_hnp_uri &&
|
||||
ORTE_PROC_IS_MPI &&
|
||||
pmi_startup()) {
|
||||
/* if PMI is available, make it available for use by MPI procs */
|
||||
*priority = my_priority;
|
||||
|
81
orte/test/system/getenv_pmi.c
Обычный файл
81
orte/test/system/getenv_pmi.c
Обычный файл
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "pmi.h"
|
||||
|
||||
/* NOTES
|
||||
*
|
||||
* useful debug environment variables:
|
||||
* PMI_DEBUG
|
||||
*/
|
||||
|
||||
int main(int argc, char **argv, char **envp)
|
||||
{
|
||||
int i;
|
||||
int pmi_rank = -1;
|
||||
int pmi_process_group_size = -1;
|
||||
int num_local_procs = 0;
|
||||
int *local_rank_ids = NULL;
|
||||
int spawned = PMI_FALSE;
|
||||
int rc = EXIT_FAILURE;
|
||||
pid_t pid = 0;
|
||||
char *err = NULL;
|
||||
PMI_BOOL pmi_initialized = PMI_FALSE;
|
||||
|
||||
/* sanity */
|
||||
if (PMI_SUCCESS != PMI_Initialized(&pmi_initialized) ||
|
||||
PMI_TRUE == pmi_initialized) {
|
||||
fprintf(stderr, "=== ERROR: PMI sanity failure\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (PMI_SUCCESS != PMI_Init(&spawned)) {
|
||||
err = "PMI_Init failure!";
|
||||
goto done;
|
||||
}
|
||||
if (PMI_SUCCESS != PMI_Get_size(&pmi_process_group_size)) {
|
||||
err = "PMI_Get_size failure!";
|
||||
goto done;
|
||||
}
|
||||
if (PMI_SUCCESS != PMI_Get_rank(&pmi_rank)) {
|
||||
err = "PMI_Get_rank failure!";
|
||||
goto done;
|
||||
}
|
||||
if (PMI_SUCCESS != PMI_Get_clique_size(&num_local_procs)) {
|
||||
err = "PMI_Get_clique_size failure!";
|
||||
goto done;
|
||||
}
|
||||
if (NULL == (local_rank_ids = calloc(num_local_procs, sizeof(int)))) {
|
||||
err = "out of resources";
|
||||
goto done;
|
||||
}
|
||||
if (PMI_SUCCESS != PMI_Get_clique_ranks(local_rank_ids, num_local_procs)) {
|
||||
err = "PMI_Get_clique_size failure!";
|
||||
goto done;
|
||||
}
|
||||
/* lowest local rank will print env info and tag its output*/
|
||||
if (pmi_rank == local_rank_ids[0]) {
|
||||
for (; NULL != envp && NULL != *envp; ++envp) {
|
||||
printf("===[%d]: %s\n", pmi_rank, *envp);
|
||||
}
|
||||
}
|
||||
|
||||
rc = EXIT_SUCCESS;
|
||||
|
||||
done:
|
||||
if (PMI_TRUE == pmi_initialized) {
|
||||
if (PMI_SUCCESS != PMI_Finalize()) {
|
||||
err = "PMI_Finalize failure!";
|
||||
}
|
||||
}
|
||||
if (NULL != err) {
|
||||
fprintf(stderr, "=== ERROR [rank:%d] %s\n", pmi_rank, err);
|
||||
}
|
||||
return rc;
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user