1
1

Revise the universe connection logic. Two cases are now handled:

1. user does NOT specify the universe name. For the default universe case, if we detect an existing default universe and cannot connect to it, we quietly create an alternative default name by adding the pid to the orte_default_universe name and move on - we no longer provide a warning message for this case.

2. user specified a universe name. If we detect an existing universe of that name and cannot connect to it, we consider this an error condition and abort.

This commit was SVN r7131.
Этот коммит содержится в:
Ralph Castain 2005-09-01 15:50:38 +00:00
родитель 636ab23fdb
Коммит d0f7dafc47
3 изменённых файлов: 62 добавлений и 50 удалений

Просмотреть файл

@ -3,14 +3,14 @@
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -23,11 +23,16 @@
/*
* This macro and array are used to output intelligible error
* messages.
* messages.
*/
#define ORTE_ERROR_NAME(n) opal_strerror(n)
/*
* Standard names used across the system
*/
#define ORTE_DEFAULT_UNIVERSE "default-universe"
/*
* ORTE SEGMENT NAMES
* There are some predefined segments that are used across the entire ORTE system.

Просмотреть файл

@ -3,14 +3,14 @@
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -79,21 +79,27 @@ orte_sds_base_basic_contact_universe(void)
}
if (ORTE_ERR_NOT_FOUND != ret) {
/* if it exists but no contact could be established,
* define unique name based on current one.
* and start new universe with me as seed
* we first check to see if this was a default universe
* or one that the user specifically requested. If
* it's the default, then we quietly generate a unique
* new name and start a new universe behind the scenes.
* If it was not the default (i.e., the user specifically
* directed us to a named universe), then we return an
* error code and abort.
*/
universe = strdup(orte_universe_info.name);
free(orte_universe_info.name);
orte_universe_info.name = NULL;
pid = getpid();
if (0 > asprintf(&orte_universe_info.name, "%s-%d", universe, (int)pid)) {
opal_output(0, "orte_init: failed to create unique universe name");
return ret;
if (0 == strcmp(ORTE_DEFAULT_UNIVERSE, orte_universe_info.name)) {
/* default universe - generate unique name and proceed */
universe = strdup(orte_universe_info.name);
free(orte_universe_info.name);
orte_universe_info.name = NULL;
pid = getpid();
if (0 > asprintf(&orte_universe_info.name, "%s-%d", universe, (int)pid)) {
opal_output(0, "orte_init: failed to create unique universe name");
return ret;
}
} else { /* user-specified name - abort */
return ORTE_ERR_UNREACH;
}
opal_output(0, "Could not join a running, existing universe");
opal_output(0, "Establishing a new one named: %s",
orte_universe_info.name);
}
orte_process_info.seed = true;
/* since we are seed, ensure that all replica info is NULL'd */
@ -105,7 +111,7 @@ orte_sds_base_basic_contact_universe(void)
free(orte_process_info.ns_replica);
orte_process_info.ns_replica = NULL;
}
if (NULL != orte_process_info.gpr_replica_uri) {
free(orte_process_info.gpr_replica_uri);
orte_process_info.gpr_replica_uri = NULL;
@ -125,7 +131,7 @@ int
orte_sds_base_seed_set_name(void)
{
int id, flag, rc;
/* if we're a seed and we're not infrastructure, we're also a
singleton. So set the singleton flag in that case */
id = mca_base_param_find("orte", NULL, "infrastructure");
@ -142,6 +148,6 @@ orte_sds_base_seed_set_name(void)
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -3,14 +3,14 @@
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
#include "orte_config.h"
@ -29,15 +29,16 @@
#include <sys/stat.h>
#include "include/orte_constants.h"
#include "mca/base/base.h"
#include "mca/base/mca_base_param.h"
#include "mca/ns/ns_types.h"
#include "orte/include/orte_constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/schema/schema_types.h"
#include "opal/util/output.h"
#include "util/proc_info.h"
#include "util/sys_info.h"
#include "orte/util/proc_info.h"
#include "orte/util/sys_info.h"
#include "util/univ_info.h"
#include "orte/util/univ_info.h"
orte_universe_t orte_universe_info = {
/* .init = */ false,
@ -57,12 +58,12 @@ int orte_univ_info(void)
{
int id, tmp;
char *tmpname=NULL, *tptr, *ptr;
if (!orte_universe_info.init) {
id = mca_base_param_register_string("universe", NULL, NULL, NULL, NULL);
mca_base_param_lookup_string(id, &tmpname);
if (NULL != tmpname) {
if (NULL != tmpname) {
/* Universe name info is passed as userid@hostname:univ_name */
/* extract the userid from the universe option, if provided */
tptr = tmpname;
@ -77,7 +78,7 @@ int orte_univ_info(void)
}
orte_universe_info.uid = strdup(orte_system_info.user);
}
/* extract the hostname, if provided */
if (NULL != (ptr = strchr(tptr, ':'))) {
*ptr = '\0';
@ -87,7 +88,7 @@ int orte_univ_info(void)
} else {
orte_universe_info.host = strdup(orte_system_info.nodename);
}
/* now copy the universe name into the universe_info structure */
orte_universe_info.name = strdup(tptr);
} else {
@ -97,30 +98,30 @@ int orte_univ_info(void)
orte_universe_info.uid = strdup(orte_system_info.user);
orte_universe_info.host = strdup(orte_system_info.nodename);
/* and the universe name to default-universe */
orte_universe_info.name = strdup("default-universe");
orte_universe_info.name = strdup(ORTE_DEFAULT_UNIVERSE);
}
id = mca_base_param_register_int("universe", "persistence", NULL, NULL, orte_universe_info.persistence);
mca_base_param_lookup_int(id, &tmp);
orte_universe_info.persistence = (tmp ? true : false);
id = mca_base_param_register_string("universe", "scope", NULL, NULL, orte_universe_info.scope);
mca_base_param_lookup_string(id, &(orte_universe_info.scope));
id = mca_base_param_register_int("universe", "console", NULL, NULL, orte_universe_info.console);
mca_base_param_lookup_int(id, &tmp);
orte_universe_info.console = (tmp ? true : false);
id = mca_base_param_register_string("universe", "uri", NULL, NULL, orte_universe_info.seed_uri);
mca_base_param_lookup_string(id, &(orte_universe_info.seed_uri));
/* console connected is set elsewhere */
id = mca_base_param_register_string("universe", "script", NULL, NULL, orte_universe_info.scriptfile);
mca_base_param_lookup_string(id, &(orte_universe_info.scriptfile));
orte_universe_info.init = true;
}
return(ORTE_SUCCESS);
}
@ -131,32 +132,32 @@ int orte_univ_info_finalize(void)
free(orte_universe_info.name);
orte_universe_info.name = NULL;
}
if (NULL != orte_universe_info.host) {
free(orte_universe_info.host);
orte_universe_info.host = NULL;
}
if (NULL != orte_universe_info.uid) {
free(orte_universe_info.uid);
orte_universe_info.uid = NULL;
}
if (NULL != orte_universe_info.scope) {
free(orte_universe_info.scope);
orte_universe_info.scope = NULL;
}
if (NULL != orte_universe_info.seed_uri) {
free(orte_universe_info.seed_uri);
orte_universe_info.seed_uri = NULL;
}
if (NULL != orte_universe_info.scriptfile) {
free(orte_universe_info.scriptfile);
orte_universe_info.scriptfile = NULL;
}
orte_universe_info.init = false;
orte_universe_info.persistence = false;
orte_universe_info.console = false;