2005-03-14 23:57:21 +03:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2005-03-14 23:57:21 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2005-03-14 23:57:21 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*
|
|
|
|
* Setup command line options for the Open MPI Run Time Environment
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2005-05-05 23:14:51 +04:00
|
|
|
#include "orte_config.h"
|
2005-03-14 23:57:21 +03:00
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#ifdef HAVE_SYS_TIME_H
|
|
|
|
#include <sys/time.h>
|
|
|
|
#endif
|
2006-06-29 01:03:31 +04:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <libgen.h>
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/orte_constants.h"
|
2005-07-04 03:31:27 +04:00
|
|
|
#include "opal/util/output.h"
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/util/univ_info.h"
|
|
|
|
#include "orte/util/sys_info.h"
|
|
|
|
#include "orte/util/proc_info.h"
|
2005-07-04 05:59:52 +04:00
|
|
|
#include "opal/util/os_path.h"
|
2006-07-04 02:23:07 +04:00
|
|
|
#include "opal/util/os_dirpath.h"
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/util/session_dir.h"
|
|
|
|
#include "orte/util/universe_setup_file_io.h"
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/mca/rml/rml.h"
|
|
|
|
#include "orte/mca/ns/ns.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/runtime/runtime.h"
|
2005-03-14 23:57:21 +03:00
|
|
|
|
|
|
|
|
|
|
|
static struct timeval ompi_rte_ping_wait = {2, 0};
|
|
|
|
|
2006-06-29 01:03:31 +04:00
|
|
|
int orte_universe_search(opal_list_t *universe_list) {
|
|
|
|
int ret, exit_status = ORTE_SUCCESS;
|
|
|
|
DIR *cur_dirp = NULL;
|
|
|
|
struct dirent * dir_entry;
|
|
|
|
char *univ_setup_filename = NULL;
|
|
|
|
char *fulldirpath = NULL;
|
|
|
|
char *prefix = NULL;
|
|
|
|
char *frontend = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the session directory
|
|
|
|
*/
|
|
|
|
if( ORTE_SUCCESS != (ret = orte_session_dir_get_name(&fulldirpath,
|
|
|
|
&prefix,
|
|
|
|
&frontend,
|
|
|
|
orte_system_info.user,
|
|
|
|
orte_system_info.nodename,
|
|
|
|
NULL, /* batch ID -- Not used */
|
|
|
|
strdup("dummy"), /* Universe Name -- appened below */
|
|
|
|
NULL, /* jobid */
|
|
|
|
NULL /* vpid */
|
|
|
|
) ) ) {
|
|
|
|
exit_status = ret;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Strip off dummy the universe name */
|
|
|
|
fulldirpath = dirname(fulldirpath);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to make sure we have access to this directory
|
|
|
|
*/
|
2006-07-04 02:23:07 +04:00
|
|
|
if( ORTE_SUCCESS != (ret = opal_os_dirpath_access(fulldirpath, 0) )) {
|
2006-06-29 01:03:31 +04:00
|
|
|
exit_status = ret;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open up the base directory so we can get a listing
|
|
|
|
*/
|
|
|
|
if( NULL == (cur_dirp = opendir(fulldirpath)) ) {
|
|
|
|
exit_status = ORTE_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For each directory/universe
|
|
|
|
*/
|
|
|
|
while( NULL != (dir_entry = readdir(cur_dirp))) {
|
|
|
|
orte_universe_t *univ = NULL;
|
|
|
|
char * tmp_str = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip non-universe directories
|
|
|
|
*/
|
|
|
|
if( 0 == strncmp(dir_entry->d_name, ".", strlen(".")) ||
|
|
|
|
0 == strncmp(dir_entry->d_name, ".", strlen("..")) ) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read the setup file
|
|
|
|
*/
|
|
|
|
tmp_str = strdup(dir_entry->d_name);
|
|
|
|
asprintf(&univ_setup_filename, "%s/%s/%s",
|
|
|
|
fulldirpath,
|
|
|
|
tmp_str,
|
|
|
|
"universe-setup.txt");
|
|
|
|
|
|
|
|
univ = OBJ_NEW(orte_universe_t);
|
|
|
|
OBJ_RETAIN(univ);
|
|
|
|
if(ORTE_SUCCESS != (ret = orte_read_universe_setup_file(univ_setup_filename, univ) ) ){
|
|
|
|
printf("orte_ps: Unable to read the file (%s)\n", univ_setup_filename);
|
|
|
|
exit_status = ret;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
opal_list_append(universe_list, &(univ->super));
|
|
|
|
|
|
|
|
if( NULL != tmp_str)
|
|
|
|
free(tmp_str);
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if( NULL != cur_dirp )
|
|
|
|
closedir(cur_dirp);
|
|
|
|
if( NULL != univ_setup_filename)
|
|
|
|
free(univ_setup_filename);
|
|
|
|
if( NULL != fulldirpath)
|
|
|
|
free(fulldirpath);
|
|
|
|
if( NULL != prefix)
|
|
|
|
free(prefix);
|
|
|
|
if( NULL != frontend)
|
|
|
|
free(frontend);
|
|
|
|
|
|
|
|
return exit_status;
|
|
|
|
}
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2005-05-05 23:14:51 +04:00
|
|
|
int orte_universe_exists(orte_universe_t *univ)
|
2005-03-14 23:57:21 +03:00
|
|
|
{
|
|
|
|
char *contact_file;
|
|
|
|
int ret;
|
|
|
|
|
2005-05-05 23:14:51 +04:00
|
|
|
/* check to see if local universe session directory already exists */
|
|
|
|
if (ORTE_SUCCESS != orte_session_dir(false,
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_process_info.tmpdir_base,
|
|
|
|
orte_system_info.user,
|
|
|
|
orte_system_info.nodename,
|
|
|
|
NULL,
|
|
|
|
orte_universe_info.name,
|
|
|
|
NULL,
|
2005-05-05 23:14:51 +04:00
|
|
|
NULL)) { /* not found */
|
|
|
|
/* NOTE: NOT FINDING THE DIRECTORY IS NOT AN ERROR - DON'T ERROR_LOG IT */
|
|
|
|
return ORTE_ERR_NOT_FOUND;
|
|
|
|
}
|
2005-03-14 23:57:21 +03:00
|
|
|
|
|
|
|
/* check for "contact-info" file. if present, read it in. */
|
2005-07-04 05:59:52 +04:00
|
|
|
if (NULL == (contact_file = opal_os_path(false, orte_process_info.universe_session_dir,
|
2005-05-05 23:14:51 +04:00
|
|
|
"universe-setup.txt", NULL))) {
|
|
|
|
/* NOTE: NOT FINDING THE FILE IS NOT AN ERROR - DON'T ERROR_LOG IT */
|
|
|
|
return ORTE_ERR_NOT_FOUND;
|
|
|
|
}
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2005-05-05 23:20:47 +04:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_read_universe_setup_file(contact_file, univ))) {
|
2005-05-06 00:23:19 +04:00
|
|
|
/* NOTE: THIS IS NOT AN ERROR - DON'T ERROR_LOG IT */
|
2005-05-05 23:14:51 +04:00
|
|
|
free(contact_file);
|
2005-03-14 23:57:21 +03:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2005-08-30 01:35:01 +04:00
|
|
|
/* don't need this string any more - free it */
|
|
|
|
free(contact_file);
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
if (orte_debug_flag) {
|
2005-07-04 03:31:27 +04:00
|
|
|
opal_output(0, "connect_uni: contact info read");
|
2005-03-14 23:57:21 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!orte_universe_info.console) { /* if we aren't trying to connect a console */
|
2005-05-05 23:14:51 +04:00
|
|
|
if (!univ->persistence || /* if the target universe is not persistent... */
|
|
|
|
(0 == strncmp(univ->scope, "exclusive", strlen("exclusive")))) { /* ...or no connection allowed */
|
2005-03-14 23:57:21 +03:00
|
|
|
/* also need to check "local" and that we did not specify the exact
|
|
|
|
* matching universe name
|
|
|
|
*/
|
|
|
|
if (orte_debug_flag) {
|
2005-07-04 03:31:27 +04:00
|
|
|
opal_output(0, "connect_uni: connection not allowed");
|
2005-03-14 23:57:21 +03:00
|
|
|
}
|
2005-05-05 23:14:51 +04:00
|
|
|
/* NOTE: THIS IS NOT AN ERROR - DON'T ERROR_LOG IT */
|
2005-03-14 23:57:21 +03:00
|
|
|
return ORTE_ERR_NO_CONNECTION_ALLOWED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (orte_debug_flag) {
|
2005-07-04 03:31:27 +04:00
|
|
|
opal_output(0, "connect_uni: contact info to set: %s", univ->seed_uri);
|
2005-03-14 23:57:21 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-05-05 23:14:51 +04:00
|
|
|
/* if persistent, ping to verify it's alive */
|
|
|
|
if (ORTE_SUCCESS != orte_rml.ping(univ->seed_uri, &ompi_rte_ping_wait)) {
|
2005-03-14 23:57:21 +03:00
|
|
|
if (orte_debug_flag) {
|
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_CONNECTION_FAILED);
|
|
|
|
}
|
|
|
|
return ORTE_ERR_CONNECTION_FAILED;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|