1
1

Cleanup the problem of connecting to default universes.

This commit was SVN r12438.
Этот коммит содержится в:
Ralph Castain 2006-11-06 15:28:38 +00:00
родитель 9cf5b3709c
Коммит 194bdd413b
3 изменённых файлов: 100 добавлений и 34 удалений

Просмотреть файл

@ -204,11 +204,95 @@ int orte_universe_search(opal_list_t *universe_list)
return (opal_list_is_empty(universe_list) ? exit_status : ORTE_SUCCESS);
}
static int orte_universe_check_connect(orte_universe_t *uni)
{
if (!orte_universe_info.console) { /* if we aren't trying to connect a console */
if (!uni->persistence || /* if the target universe is not persistent... */
(0 == strncmp(uni->scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
/* also need to check "local" and that we did not specify the exact
* matching universe name
*/
if (orte_debug_flag) {
opal_output(0, "connect_uni: connection not allowed");
}
/* NOTE: THIS IS NOT AN ERROR - DON'T ERROR_LOG IT */
return ORTE_ERR_NO_CONNECTION_ALLOWED;
}
}
if (orte_debug_flag) {
opal_output(0, "connect_uni: contact info to set: %s", uni->seed_uri);
}
/* ping to verify it's alive */
if (ORTE_SUCCESS != orte_rml.ping(uni->seed_uri, &ompi_rte_ping_wait)) {
if (orte_debug_flag) {
ORTE_ERROR_LOG(ORTE_ERR_CONNECTION_FAILED);
}
return ORTE_ERR_CONNECTION_FAILED;
}
return ORTE_SUCCESS;
}
int orte_universe_exists(orte_universe_t *univ)
{
char *contact_file;
opal_list_t universes;
opal_list_item_t *item;
orte_universe_t *uniptr;
int ret;
/* if the user didn't provide a name for our universe, then we have to check
* for other universe names we could join. It is virtually impossible for
* another universe to have our exact default universe name as they would
* have to have the same PID - and that would be bad in so many ways!
*/
if (orte_universe_info.default_name) {
/* if we just have the default name - i.e., no name was specified -
* then get a list of all universes known on the local system. All
* we can do here is just loop through the session directory tree
* for universes - we have no better discovery mechanism at this time
*/
OBJ_CONSTRUCT(&universes, opal_list_t);
if (ORTE_SUCCESS != (ret = orte_universe_search(&universes))) {
/* if nothing was found, that's okay - report anything else */
if (ORTE_ERR_NOT_FOUND != ret) {
ORTE_ERROR_LOG(ret);
}
return ret;
}
/* if the list is empty, then we can just return */
if (opal_list_is_empty(&universes)) return ORTE_ERR_NOT_FOUND;
/* we have no real criteria for picking one over the other, so
* we just loop through the returned objects and pick the first
* one that will support connection
*/
while (NULL != (item = opal_list_remove_first(&universes))) {
uniptr = (orte_universe_t*)item;
if (ORTE_SUCCESS == orte_universe_check_connect(uniptr)) {
univ->name = strdup(uniptr->name);
univ->host = strdup(uniptr->host);
univ->uid = strdup(uniptr->uid);
univ->persistence = uniptr->persistence;
univ->scope = strdup(uniptr->scope);
univ->seed_uri = strdup(uniptr->seed_uri);
univ->console_connected = uniptr->console_connected;
return ORTE_SUCCESS;
}
}
/* if we get here, then we did not success in connecting to
* anyone - report that situation
*/
return ORTE_ERR_NOT_FOUND;
}
/* if the user did provide a name, then see if we can join it */
/* check to see if local universe session directory already exists */
if (ORTE_SUCCESS != orte_session_dir(false,
orte_process_info.tmpdir_base,
@ -242,32 +326,5 @@ int orte_universe_exists(orte_universe_t *univ)
opal_output(0, "connect_uni: contact info read");
}
if (!orte_universe_info.console) { /* if we aren't trying to connect a console */
if (!univ->persistence || /* if the target universe is not persistent... */
(0 == strncmp(univ->scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
/* also need to check "local" and that we did not specify the exact
* matching universe name
*/
if (orte_debug_flag) {
opal_output(0, "connect_uni: connection not allowed");
}
/* NOTE: THIS IS NOT AN ERROR - DON'T ERROR_LOG IT */
return ORTE_ERR_NO_CONNECTION_ALLOWED;
}
}
if (orte_debug_flag) {
opal_output(0, "connect_uni: contact info to set: %s", univ->seed_uri);
}
/* if persistent, ping to verify it's alive */
if (ORTE_SUCCESS != orte_rml.ping(univ->seed_uri, &ompi_rte_ping_wait)) {
if (orte_debug_flag) {
ORTE_ERROR_LOG(ORTE_ERR_CONNECTION_FAILED);
}
return ORTE_ERR_CONNECTION_FAILED;
}
return ORTE_SUCCESS;
return orte_universe_check_connect(univ);
}

Просмотреть файл

@ -59,12 +59,13 @@ void orte_universe_construct(orte_universe_t *obj) {
obj->console = false;
obj->console_connected = false;
obj->name = NULL;
obj->host = NULL;
obj->uid = NULL;
obj->scope = NULL;
obj->seed_uri = NULL;
obj->scriptfile = NULL;
obj->name = NULL;
obj->default_name = false;
obj->host = NULL;
obj->uid = NULL;
obj->scope = NULL;
obj->seed_uri = NULL;
obj->scriptfile = NULL;
}
void orte_universe_destruct( orte_universe_t *obj) {
@ -146,6 +147,10 @@ int orte_univ_info(void)
/* now copy the universe name into the universe_info structure */
orte_universe_info.name = strdup(tptr);
/* indicate that the universe name was provided */
orte_universe_info.default_name = false;
} else {
/* if nothing was provided, then initialize the user and nodename
* to the local values
@ -154,6 +159,9 @@ int orte_univ_info(void)
orte_universe_info.host = strdup(orte_system_info.nodename);
/* and the universe name to default-universe-PID */
asprintf(&orte_universe_info.name, "%s-%d", ORTE_DEFAULT_UNIVERSE, getpid());
/* indicate that the universe name is a default one */
orte_universe_info.default_name = true;
}
id = mca_base_param_register_int("universe", "persistence", NULL, NULL, orte_universe_info.persistence);

Просмотреть файл

@ -60,6 +60,7 @@ struct orte_universe_t {
opal_list_item_t super;
orte_universe_state_t state; /**< Indicates state of the universe */
char *name;
bool default_name; /**< Indicates that universe name was not provided */
char *host;
char *uid;
bool persistence;