1
1

Brining over the session directory and universe changes

from the tmp/jjhursey-ft-cr branch.

In this commit we change the way universe names are created.
Before we by default first created "default-universe" then
if there was a conflict we created "default-universe-PID"
where PID is the PID of the HNP.
Now we create "default-universe-PID" all the time (when
a default universe name is used). This makes it much 
easier when trying to find a HNP from an outside app 
(e.g. orte-ps, orteconsole, ...)

This also adds a "search" function to find all of the 
universes on the machine. This is useful in many contexts
when trying to find a persistent daemon or when trying to 
connect to a HNP.

This commit also makes orte_universe_t an opal_object_t, 
which is something that needed to happen, and only effected
the SDS in one of it's base functions.


I was asked to bring this over to aid in fixing orteconsole
and orteprobe. Due to the change of orte_universe_t to 
an object orteprobe may need to be updated to reflect this 
change. Since orteprobe needs to be looked at anyway I'll
leave this to Ralph to take care of.

*Note*:
These changes do not depend upon any of the FT work (but
the FT work does depend upon them). These were brought over
to help in fixing some of the ORTE tool set that require
the functionality layed out in this patch.

Testing:
Ran the 'ibm' tests before and after this change, and all was
as well as before the change. If anyone notices additional
irregularities in the system let me know. But none are expected.

This commit was SVN r10550.
Этот коммит содержится в:
Josh Hursey 2006-06-28 21:03:31 +00:00
родитель 43b7b17033
Коммит 0a931f9fad
8 изменённых файлов: 591 добавлений и 282 удалений

Просмотреть файл

@ -40,8 +40,8 @@ orte_sds_base_basic_contact_universe(void)
{
int ret, rc, exit_if_not_exist;
orte_universe_t univ;
char *universe;
pid_t pid;
OBJ_CONSTRUCT(&univ, orte_universe_t);
/* if we were NOT given registry and name service replicas (i.e., we
* weren't told a universe contact point), check for some
@ -49,15 +49,19 @@ orte_sds_base_basic_contact_universe(void)
if (NULL == orte_process_info.ns_replica_uri || NULL == orte_process_info.gpr_replica_uri) {
if (ORTE_SUCCESS == (ret = orte_universe_exists(&univ))) {
/* copy universe info into our universe structure */
orte_universe_info.name = univ.name;
orte_universe_info.host = univ.host;
orte_universe_info.uid = univ.uid;
orte_universe_info.name = strdup(univ.name);
orte_universe_info.host = strdup(univ.host);
orte_universe_info.uid = strdup(univ.uid);
orte_universe_info.persistence = univ.persistence;
orte_universe_info.scope = univ.scope;
orte_universe_info.console = univ.console;
orte_universe_info.seed_uri = univ.seed_uri;
orte_universe_info.scope = strdup(univ.scope);
/* JJH XXX This will inadvertently overwrite the console MCA param */
/* orte_universe_info.console = univ.console; JJH XXX */
orte_universe_info.seed_uri = strdup(univ.seed_uri);
orte_universe_info.console_connected = univ.console_connected;
orte_universe_info.scriptfile = univ.scriptfile;
if( NULL != univ.scriptfile)
orte_universe_info.scriptfile = strdup(univ.scriptfile);
else
orte_universe_info.scriptfile = NULL;
/* define the replica contact points */
orte_process_info.ns_replica_uri = strdup(univ.seed_uri);
orte_process_info.gpr_replica_uri = strdup(univ.seed_uri);
@ -80,33 +84,11 @@ orte_sds_base_basic_contact_universe(void)
return ORTE_ERR_UNREACH;
}
if (ORTE_ERR_NOT_FOUND != ret) {
/* if it exists but no contact could be established,
* we first check to see if this was a default universe
* or one that the user specifically requested. If
* it's the default, then we quietly generate a unique
* new name and start a new universe behind the scenes.
* If it was not the default (i.e., the user specifically
* directed us to a named universe), then we return an
* error code and abort.
*/
if (0 == strcmp(ORTE_DEFAULT_UNIVERSE, orte_universe_info.name)) {
/* default universe - generate unique name and proceed */
universe = strdup(orte_universe_info.name);
free(orte_universe_info.name);
orte_universe_info.name = NULL;
pid = getpid();
if (0 > asprintf(&orte_universe_info.name, "%s-%d", universe, (int)pid)) {
opal_output(0, "orte_init: failed to create unique universe name");
free(universe);
return ret;
}
free(universe);
} else { /* user-specified name - abort */
/* user-specified name - abort */
opal_output(0, "orte_init: could not contact the specified universe name %s",
orte_universe_info.name);
return ORTE_ERR_UNREACH;
}
}
orte_process_info.seed = true;
/* since we are seed, ensure that all replica info is NULL'd */
if (NULL != orte_process_info.ns_replica_uri) {
@ -129,6 +111,8 @@ orte_sds_base_basic_contact_universe(void)
}
}
OBJ_DESTRUCT(&univ);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -29,6 +29,9 @@
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include <sys/types.h>
#include <dirent.h>
#include <libgen.h>
#include "orte/orte_constants.h"
#include "opal/util/output.h"
@ -48,6 +51,103 @@
static struct timeval ompi_rte_ping_wait = {2, 0};
int orte_universe_search(opal_list_t *universe_list) {
int ret, exit_status = ORTE_SUCCESS;
DIR *cur_dirp = NULL;
struct dirent * dir_entry;
char *univ_setup_filename = NULL;
char *fulldirpath = NULL;
char *prefix = NULL;
char *frontend = NULL;
/*
* Get the session directory
*/
if( ORTE_SUCCESS != (ret = orte_session_dir_get_name(&fulldirpath,
&prefix,
&frontend,
orte_system_info.user,
orte_system_info.nodename,
NULL, /* batch ID -- Not used */
strdup("dummy"), /* Universe Name -- appened below */
NULL, /* jobid */
NULL /* vpid */
) ) ) {
exit_status = ret;
goto cleanup;
}
/* Strip off dummy the universe name */
fulldirpath = dirname(fulldirpath);
/*
* Check to make sure we have access to this directory
*/
if( ORTE_SUCCESS != (ret = orte_session_dir_check_dir(fulldirpath) )) {
exit_status = ret;
goto cleanup;
}
/*
* Open up the base directory so we can get a listing
*/
if( NULL == (cur_dirp = opendir(fulldirpath)) ) {
exit_status = ORTE_ERROR;
goto cleanup;
}
/*
* For each directory/universe
*/
while( NULL != (dir_entry = readdir(cur_dirp))) {
orte_universe_t *univ = NULL;
char * tmp_str = NULL;
/*
* Skip non-universe directories
*/
if( 0 == strncmp(dir_entry->d_name, ".", strlen(".")) ||
0 == strncmp(dir_entry->d_name, ".", strlen("..")) ) {
continue;
}
/*
* Read the setup file
*/
tmp_str = strdup(dir_entry->d_name);
asprintf(&univ_setup_filename, "%s/%s/%s",
fulldirpath,
tmp_str,
"universe-setup.txt");
univ = OBJ_NEW(orte_universe_t);
OBJ_RETAIN(univ);
if(ORTE_SUCCESS != (ret = orte_read_universe_setup_file(univ_setup_filename, univ) ) ){
printf("orte_ps: Unable to read the file (%s)\n", univ_setup_filename);
exit_status = ret;
goto cleanup;
}
opal_list_append(universe_list, &(univ->super));
if( NULL != tmp_str)
free(tmp_str);
}
cleanup:
if( NULL != cur_dirp )
closedir(cur_dirp);
if( NULL != univ_setup_filename)
free(univ_setup_filename);
if( NULL != fulldirpath)
free(fulldirpath);
if( NULL != prefix)
free(prefix);
if( NULL != frontend)
free(frontend);
return exit_status;
}
int orte_universe_exists(orte_universe_t *univ)
{

Просмотреть файл

@ -135,6 +135,17 @@ OMPI_DECLSPEC int orte_monitor_procs_registered(void);
OMPI_DECLSPEC int orte_monitor_procs_unregistered(void);
/**
* Obtain a listing of all the universes on the machine
*
* @param univ_list An opal_list_t is returned to the user.
* This is not initalized in the function, the caller retains
* the responsibility for this variable.
* @retval ORTE_SUCCESS Upon successful search.
* @retval ORTE_ERROR Upon unsuccessful search.
*/
OMPI_DECLSPEC int orte_universe_search(opal_list_t *universe_list);
/**
* Check for universe existence
*

Просмотреть файл

@ -58,7 +58,10 @@
#include "orte/util/session_dir.h"
static int orte_check_dir(bool create, char *directory);
/*******************************
* Local function Declarations
*******************************/
static int orte_create_dir(char *directory);
static void orte_dir_empty(char *pathname);
static void orte_dir_empty_all(char *pathname);
@ -73,7 +76,43 @@ static bool orte_is_empty(char *pathname);
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
static int orte_check_dir(bool create, char *directory)
/****************************
* Funcationality
****************************/
/*
* Check and create the directory requested
*/
static int orte_create_dir(char *directory)
{
#ifndef __WINDOWS__
mode_t my_mode = S_IRWXU; /* at the least, I need to be able to do anything */
#else
mode_t my_mode = _S_IREAD | _S_IWRITE | _S_IEXEC;
#endif
int ret;
/* Sanity check before creating the directory with the proper mode,
* Make sure it doesn't exist already */
if( ORTE_ERR_NOT_FOUND != (ret = orte_session_dir_check_dir(directory)) ) {
/* Failure because orte_session_dir_check_dir() indicated that either:
* - The directory exists and we can access it (no need to create it again),
* return ORTE_SUCCESS, or
* - don't have access rights, return ORTE_ERROR
*/
return(ret);
}
/* The directory doesn't exist so create it */
else {
return(opal_os_create_dirpath(directory, my_mode));
}
}
/*
* Check that the directory:
* - exists
* - if exists, then we have permission to interact with it
*/
int orte_session_dir_check_dir(char *directory)
{
#ifndef __WINDOWS__
struct stat buf;
@ -91,221 +130,368 @@ static int orte_check_dir(bool create, char *directory)
if ((buf.st_mode & my_mode) == my_mode) { /* okay, I can work here */
return(ORTE_SUCCESS);
}
else {
/* Don't have access rights to the existing directory */
return(ORTE_ERROR);
}
}
if (create) {
return(opal_os_create_dirpath(directory, my_mode)); /* try to create it with proper mode */
else {
/* We could not find the directory */
return( ORTE_ERR_NOT_FOUND );
}
return(ORTE_ERROR); /* couldn't find it, or don't have access rights, and not asked to create it */
}
int orte_session_dir(bool create, char *prfx, char *usr, char *hostid,
char *batchid, char *univ, char *job, char *proc)
{
char *fulldirpath=NULL, *tmp=NULL, *hostname=NULL, *batchname=NULL;
char *sessions=NULL, *frontend=NULL, *user=NULL, *universe=NULL;
char *prefix=NULL, *sav=NULL;
int return_code;
/* ensure that system info is set */
/*
* Construct the fullpath to the session directory
*/
int
orte_session_dir_get_name(char **fulldirpath,
char **prefix, /* This will come back as the valid tmp dir */
char **frontend,
char *usr, char *hostid,
char *batchid, char *univ,
char *job, char *proc) {
char *hostname = NULL,
*batchname = NULL,
*sessions = NULL,
*user = NULL,
*universe = NULL;
int exit_status = ORTE_SUCCESS;
/* Ensure that system info is set */
orte_sys_info();
if (NULL == usr) { /* check if user set elsewhere */
if (NULL == orte_system_info.user) { /* error condition */
return ORTE_ERROR;
} else {
user = strdup(orte_system_info.user);
}
} else {
user = strdup(usr);
/*
* set the 'user' value
*/
if( NULL != usr) { /* User specified version */
user = strdup(usr);
}
else { /* check if it is set elsewhere */
if( NULL != orte_system_info.user)
user = strdup(orte_system_info.user);
else {
/* Couldn't find it, so fail */
exit_status = ORTE_ERROR;
goto cleanup;
}
}
if (NULL == univ) { /* see if universe set elsewhere */
if (NULL == orte_universe_info.name) { /* error condition */
return ORTE_ERROR;
} else {
universe = strdup(orte_universe_info.name);
}
} else {
universe = strdup(univ);
/*
* set the 'hostname'
*/
if( NULL != hostid) { /* User specified version */
hostname = strdup(hostid);
}
if (NULL == job && NULL != proc) { /* can't give a proc without a job */
return ORTE_ERROR;
}
if (NULL == hostid) { /* check if hostname set elsewhere */
if (NULL == orte_system_info.nodename) { /* don't have a hostname anywhere - error */
return_code = ORTE_ERROR;
goto CLEANUP;
} else {
hostname = strdup(orte_system_info.nodename);
}
} else {
hostname = strdup(hostid);
}
if (NULL == batchid) {
batchname = strdup("0");
} else {
batchname = batchid;
}
if (NULL == orte_process_info.top_session_dir) {
if (0 > asprintf(&frontend, "openmpi-sessions-%s@%s_%s", user, hostname, batchname)) {
return_code = ORTE_ERROR;
goto CLEANUP;
}
} else {
frontend = strdup(orte_process_info.top_session_dir);
}
if (NULL != proc) {
if (0 > asprintf(&sessions, "%s%s%s%s%s%s%s", frontend,
orte_system_info.path_sep, universe,
orte_system_info.path_sep, job,
orte_system_info.path_sep, proc)) {
return_code = ORTE_ERROR;
goto CLEANUP;
}
} else if (NULL != job) {
if (0 > asprintf(&sessions, "%s%s%s%s%s", frontend,
orte_system_info.path_sep, universe,
orte_system_info.path_sep, job)) {
return_code = ORTE_ERROR;
goto CLEANUP;
}
} else {
if (0 > asprintf(&sessions, "%s%s%s", frontend, orte_system_info.path_sep, universe)) {
return_code = ORTE_ERROR;
goto CLEANUP;
}
}
if (NULL != prefix) { /* if a prefix is specified, start looking here */
tmp = strdup(prefix);
fulldirpath = strdup(opal_os_path(false, tmp, sessions, NULL)); /* make sure it's an absolute pathname */
if (ORTE_SUCCESS == orte_check_dir(create, fulldirpath)) { /* check for existence and access, or create it */
return_code = ORTE_SUCCESS;
goto COMPLETE;
}
}
/* didn't find it, so first clear fulldirpath and tmp */
if (NULL != fulldirpath) {
free(fulldirpath); fulldirpath = NULL;
}
if (NULL != tmp) {
free(tmp); tmp = NULL;
else { /* check if it is set elsewhere */
if( NULL != orte_system_info.nodename)
hostname = strdup(orte_system_info.nodename);
else {
/* Couldn't find it, so fail */
exit_status = ORTE_ERROR;
goto cleanup;
}
}
/* no prefix was specified, so check other options in order */
if (NULL != orte_process_info.tmpdir_base) { /* stored value previously */
tmp = strdup(orte_process_info.tmpdir_base);
fulldirpath = opal_os_path(false, tmp, sessions, NULL);
if (ORTE_SUCCESS == orte_check_dir(create, fulldirpath)) { /* check for existence and access, or create it */
return_code = ORTE_SUCCESS;
goto COMPLETE;
}
free(tmp); tmp = NULL;
free(fulldirpath); fulldirpath = NULL;
} else if (NULL != getenv("OMPI_PREFIX_ENV")) { /* we have prefix enviro var - try that next */
tmp = strdup(getenv("OMPI_PREFIX_ENV"));
fulldirpath = strdup(opal_os_path(false, tmp, sessions, NULL));
if (ORTE_SUCCESS == orte_check_dir(create, fulldirpath)) { /* check for existence and access, or create it */
return_code = ORTE_SUCCESS;
goto COMPLETE;
}
free(tmp); tmp = NULL;
free(fulldirpath); fulldirpath = NULL;
} else if (NULL != getenv("TMPDIR")) {
tmp = strdup(getenv("TMPDIR"));
fulldirpath = strdup(opal_os_path(false, tmp, sessions, NULL));
if (ORTE_SUCCESS == orte_check_dir(create, fulldirpath)) { /* check for existence and access, or create it */
return_code = ORTE_SUCCESS;
goto COMPLETE;
}
free(tmp); tmp = NULL;
free(fulldirpath); fulldirpath = NULL;
} else if (NULL != getenv("TMP")) {
tmp = strdup(getenv("TMP"));
fulldirpath = strdup(opal_os_path(false, tmp, sessions, NULL));
if (ORTE_SUCCESS == orte_check_dir(create, fulldirpath)) { /* check for existence and access, or create it */
return_code = ORTE_SUCCESS;
goto COMPLETE;
}
free(tmp); tmp = NULL;
free(fulldirpath); fulldirpath = NULL;
} else {
tmp = strdup(OMPI_DEFAULT_TMPDIR);
fulldirpath = opal_os_path(false, tmp, sessions, NULL);
if (ORTE_SUCCESS == orte_check_dir(create, fulldirpath)) { /* check for existence and access, or create it */
return_code = ORTE_SUCCESS;
goto COMPLETE;
}
free(tmp); tmp = NULL;
free(fulldirpath); fulldirpath = NULL;
}
/*
* set the 'batchid'
*/
if (NULL != batchid)
batchname = strdup(batchid);
else
batchname = strdup("0");
/* couldn't find anything - return error */
return_code = ORTE_ERROR;
goto CLEANUP;
/*
* set the 'universe'
*/
if( NULL != univ) { /* User specified version */
universe = strdup(univ);
}
else { /* check if it is set elsewhere */
if( NULL != orte_universe_info.name)
universe = strdup(orte_universe_info.name);
else {
/* Couldn't find it, so fail */
exit_status = ORTE_ERROR;
goto cleanup;
}
}
/*
* Check: Can't give a proc without a job
*/
if( NULL == job &&
NULL != proc) {
exit_status = ORTE_ERROR;
goto cleanup;
}
/*
* get the front part of the session directory
* Will look something like:
* openmpi-sessions-USERNAME@HOSTNAME_BATCHID
*/
if (NULL != orte_process_info.top_session_dir) {
*frontend = strdup(orte_process_info.top_session_dir);
}
else { /* If not set then construct it */
if (0 > asprintf(frontend, "openmpi-sessions-%s@%s_%s", user, hostname, batchname)) {
exit_status = ORTE_ERROR;
goto cleanup;
}
}
COMPLETE:
if (create) { /* if creating the dir tree, overwrite the fields */
/*
* Construct the session directory
*/
/* If we were given a 'proc' then we can construct it fully into:
* openmpi-sessions-USERNAME@HOSTNAME_BATCHID/UNIVERSE/JOBID/PROC
*/
if( NULL != proc) {
if (0 > asprintf(&sessions, "%s%s%s%s%s%s%s",
*frontend,
orte_system_info.path_sep, universe,
orte_system_info.path_sep, job,
orte_system_info.path_sep, proc)) {
exit_status = ORTE_ERROR;
goto cleanup;
}
}
/* If we were given a 'job' then we can construct it partially into:
* openmpi-sessions-USERNAME@HOSTNAME_BATCHID/UNIVERSE/JOBID
*/
else if(NULL != job) {
if (0 > asprintf(&sessions, "%s%s%s%s%s",
*frontend,
orte_system_info.path_sep, universe,
orte_system_info.path_sep, job)) {
exit_status = ORTE_ERROR;
goto cleanup;
}
}
/* If we were given neither then we can construct it partially into:
* openmpi-sessions-USERNAME@HOSTNAME_BATCHID/UNIVERSE
*/
else {
if (0 > asprintf(&sessions, "%s%s%s",
*frontend,
orte_system_info.path_sep, universe )) {
exit_status = ORTE_ERROR;
goto cleanup;
}
}
/*
* If the user specified an invalid prefix, or no prefix at all
* we need to keep looking
*/
if( NULL != *fulldirpath) {
free(*fulldirpath);
*fulldirpath = NULL;
}
if( NULL != *prefix) { /* use the user specified one, if available */
;
}
/* Try to find a proper alternative prefix */
else if (NULL != orte_process_info.tmpdir_base) { /* stored value */
*prefix = strdup(orte_process_info.tmpdir_base);
}
else if( NULL != getenv("OMPI_PREFIX_ENV") ) { /* OMPI Environment var */
*prefix = strdup(getenv("OMPI_PREFIX_ENV"));
}
else if( NULL != getenv("TMPDIR") ) { /* General Environment var */
*prefix = strdup(getenv("TMPDIR"));
}
else if( NULL != getenv("TMP") ) { /* Another general environment var */
*prefix = strdup(getenv("TMP"));
}
else { /* ow. just use the default tmp directory */
*prefix = strdup(OMPI_DEFAULT_TMPDIR);
}
/*
* Construct the absolute final path
*/
*fulldirpath = strdup(opal_os_path(false, *prefix, sessions, NULL));
cleanup:
if(NULL != hostname)
free(hostname);
if(NULL != batchname)
free(batchname);
if(NULL != sessions)
free(sessions);
if(NULL != user)
free(user);
if(NULL != universe)
free(universe);
return exit_status;
}
/*
* Construct the session directory and create it if necessary
*/
int orte_session_dir(bool create,
char *prefix, char *usr, char *hostid,
char *batchid, char *univ, char *job, char *proc)
{
char *fulldirpath = NULL,
*frontend = NULL,
*sav = NULL;
int return_code = ORTE_SUCCESS, rtn;
/* This indicates if the prefix was set, and so if it fails then we
* should try with the default prefixes.*/
bool dbl_check_prefix = false;
if( NULL != prefix)
dbl_check_prefix = true;
try_again:
/*
* If the first attempt at the path creation failed, try with a null
* prefix. unless the original prefix was null, then we fail.
*/
if(!dbl_check_prefix && /* an indicator that we are trying a second time */
NULL != prefix) {
free(prefix);
prefix = NULL;
}
/*
* Get the session directory full name
* First try it with the specified prefix.
*/
if( ORTE_SUCCESS != ( rtn = orte_session_dir_get_name(&fulldirpath,
&prefix,
&frontend,
usr, hostid,
batchid, univ, job,
proc) ) ) {
return_code = rtn;
/*
* If the first attempt at the path creation failed, try with a null
* prefix. unless the original prefix was null, then we fail :(
*/
if(dbl_check_prefix) {
dbl_check_prefix = false;
goto try_again;
}
else {
goto cleanup;
}
}
/*
* Now that we have the full path, go ahead and create it if necessary
*/
if( create ) {
if( ORTE_SUCCESS != (rtn = orte_create_dir(fulldirpath) ) ) {
return_code = rtn;
if(dbl_check_prefix) {
dbl_check_prefix = false;
goto try_again;
}
else {
goto cleanup;
}
}
}
/*
* if we are not creating, then just verify that the path is OK
*/
else {
if( ORTE_SUCCESS != (rtn = orte_session_dir_check_dir(fulldirpath) )) {
/* It is not valid so we give up and return an error */
return_code = rtn;
if(dbl_check_prefix) {
dbl_check_prefix = false;
goto try_again;
}
else {
goto cleanup;
}
}
}
return_code = ORTE_SUCCESS;
/*
* If we are creating the directory tree, the overwrite the
* global structure fields
*/
if (create) {
if (NULL != orte_process_info.tmpdir_base) {
free(orte_process_info.tmpdir_base);
orte_process_info.tmpdir_base = NULL;
}
if (NULL != orte_process_info.top_session_dir) {
free(orte_process_info.top_session_dir);
orte_process_info.top_session_dir = NULL;
}
}
if (NULL == orte_process_info.tmpdir_base) {
orte_process_info.tmpdir_base = strdup(tmp); /* fill in if empty */
}
/*
* Update some of the global structures if they are empty
*/
if (NULL == orte_process_info.tmpdir_base)
orte_process_info.tmpdir_base = strdup(prefix);
if (NULL == orte_process_info.top_session_dir) {
orte_process_info.top_session_dir = strdup(frontend);
}
if (NULL == orte_process_info.top_session_dir)
orte_process_info.top_session_dir = strdup(frontend);
/*
* Set the process session directory
*/
if (NULL != proc) {
if (create) { /* overwrite if creating */
if (NULL != orte_process_info.proc_session_dir) {
free(orte_process_info.proc_session_dir);
orte_process_info.proc_session_dir = NULL;
free(orte_process_info.proc_session_dir);
orte_process_info.proc_session_dir = NULL;
}
}
if (NULL == orte_process_info.proc_session_dir) {
orte_process_info.proc_session_dir = strdup(fulldirpath);
}
sav = strdup(fulldirpath);
free(fulldirpath);
fulldirpath = strdup(dirname(sav));
free(sav);
/* Strip off last part of directory structure */
sav = strdup(fulldirpath);
free(fulldirpath);
fulldirpath = strdup(dirname(sav));
free(sav);
sav = NULL;
}
/*
* Set the job session directory
*/
if (NULL != job) {
if (create) { /* overwrite if creating */
if (NULL != orte_process_info.job_session_dir) {
free(orte_process_info.job_session_dir);
orte_process_info.job_session_dir = NULL;
free(orte_process_info.job_session_dir);
orte_process_info.job_session_dir = NULL;
}
}
if (NULL == orte_process_info.job_session_dir) {
orte_process_info.job_session_dir = strdup(fulldirpath);
}
sav = strdup(fulldirpath);
free(fulldirpath);
fulldirpath = strdup(dirname(sav));
free(sav);
/* Strip off last part of directory structure */
sav = strdup(fulldirpath);
free(fulldirpath);
fulldirpath = strdup(dirname(sav));
free(sav);
sav = NULL;
}
/*
* Set the universe session directory
*/
if (create) { /* overwrite if creating */
if (NULL != orte_process_info.universe_session_dir) {
free(orte_process_info.universe_session_dir);
@ -329,39 +515,20 @@ int orte_session_dir(bool create, char *prfx, char *usr, char *hostid,
OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base));
}
CLEANUP:
if (tmp) {
free(tmp);
}
if (fulldirpath) {
cleanup:
if(NULL != fulldirpath)
free(fulldirpath);
}
if (frontend) {
free(frontend);
}
if (batchname) {
free(batchname);
}
if (hostname) {
free(hostname);
}
if (universe) {
free(universe);
}
if (sessions) {
free(sessions);
}
if (user) {
free(user);
}
if(NULL != frontend)
free(frontend);
if(NULL != sav)
free(sav);
return return_code;
}
/*
* A job has aborted - so force cleanup.
* A job has aborted - so force cleanup of the session directory
*/
int
orte_session_dir_cleanup(orte_jobid_t jobid)
{

Просмотреть файл

@ -129,6 +129,28 @@
OMPI_DECLSPEC int orte_session_dir(bool create, char *prefix, char *user, char *hostid,
char *batchid, char *universe, char *job, char *vpid);
/*
* Construct the session directory name from the input parameters.
* This function does no checking that the directory exists, or can be used
*/
OMPI_DECLSPEC int orte_session_dir_get_name(char **fulldirpath,
char **prfx,
char **frontend,
char *usr, char *hostid,
char *batchid, char *univ,
char *job, char *proc);
/*
* Check the session directory string passed
* to check if the session directory exists, and can be accessed
*
* @param directory 'fulldirpath' returned value from orte_session_dir_get_name()
* @retval ORTE_SUCCESS If the directory exists, and can be accessed
* @retval ORTE_ERR_NOT_FOUND If the directory does not exist
* @retval ORTE_ERROR If the directory cannot be accessed, but does exist
*/
OMPI_DECLSPEC int orte_session_dir_check_dir(char *directory);
/** The orte_session_dir_finalize() function performs a cleanup of the
* session directory tree. It first removes the session directory for

Просмотреть файл

@ -42,25 +42,78 @@
#include "orte/util/univ_info.h"
orte_universe_t orte_universe_info = {
/* .state = */ ORTE_UNIVERSE_STATE_PRE_INIT,
/* .name = */ NULL,
/* .host = */ NULL,
/* .uid = */ NULL,
/* .persistence = */ false,
/* .scope = */ NULL,
/* .console = */ false,
/* .seed_uri = */ NULL,
/* .console_connected = */ false,
/* .scriptfile = */ NULL,
};
static bool universe_info_has_been_created = false;
orte_universe_t orte_universe_info;
void orte_universe_construct(orte_universe_t *obj);
void orte_universe_destruct( orte_universe_t *obj);
OBJ_CLASS_INSTANCE(orte_universe_t,
opal_list_item_t,
orte_universe_construct,
orte_universe_destruct);
void orte_universe_construct(orte_universe_t *obj) {
obj->state = ORTE_UNIVERSE_STATE_PRE_INIT;
obj->persistence = false;
obj->console = false;
obj->console_connected = false;
obj->name = NULL;
obj->host = NULL;
obj->uid = NULL;
obj->scope = NULL;
obj->seed_uri = NULL;
obj->scriptfile = NULL;
}
void orte_universe_destruct( orte_universe_t *obj) {
if (NULL != obj->name) {
free(obj->name);
obj->name = NULL;
}
if (NULL != obj->host) {
free(obj->host);
obj->host = NULL;
}
if (NULL != obj->uid) {
free(obj->uid);
obj->uid = NULL;
}
if (NULL != obj->scope) {
free(obj->scope);
obj->scope = NULL;
}
if (NULL != obj->seed_uri) {
free(obj->seed_uri);
obj->seed_uri = NULL;
}
if (NULL != obj->scriptfile) {
free(obj->scriptfile);
obj->scriptfile = NULL;
}
obj->state = ORTE_UNIVERSE_STATE_PRE_INIT;
obj->persistence = false;
obj->console = false;
obj->console_connected = false;
}
int orte_univ_info(void)
{
int id, tmp;
char *tmpname=NULL, *tptr, *ptr;
if(!universe_info_has_been_created) {
OBJ_CONSTRUCT(&orte_universe_info, orte_universe_t);
universe_info_has_been_created = true;
}
if (ORTE_UNIVERSE_STATE_PRE_INIT == orte_universe_info.state) {
id = mca_base_param_register_string("universe", NULL, NULL, NULL, NULL);
mca_base_param_lookup_string(id, &tmpname);
@ -99,8 +152,8 @@ int orte_univ_info(void)
*/
orte_universe_info.uid = strdup(orte_system_info.user);
orte_universe_info.host = strdup(orte_system_info.nodename);
/* and the universe name to default-universe */
orte_universe_info.name = strdup(ORTE_DEFAULT_UNIVERSE);
/* and the universe name to default-universe-PID */
asprintf(&orte_universe_info.name, "%s-%d", ORTE_DEFAULT_UNIVERSE, getpid());
}
id = mca_base_param_register_int("universe", "persistence", NULL, NULL, orte_universe_info.persistence);
@ -130,40 +183,7 @@ int orte_univ_info(void)
int orte_univ_info_finalize(void)
{
if (NULL != orte_universe_info.name) {
free(orte_universe_info.name);
orte_universe_info.name = NULL;
}
if (NULL != orte_universe_info.host) {
free(orte_universe_info.host);
orte_universe_info.host = NULL;
}
if (NULL != orte_universe_info.uid) {
free(orte_universe_info.uid);
orte_universe_info.uid = NULL;
}
if (NULL != orte_universe_info.scope) {
free(orte_universe_info.scope);
orte_universe_info.scope = NULL;
}
if (NULL != orte_universe_info.seed_uri) {
free(orte_universe_info.seed_uri);
orte_universe_info.seed_uri = NULL;
}
if (NULL != orte_universe_info.scriptfile) {
free(orte_universe_info.scriptfile);
orte_universe_info.scriptfile = NULL;
}
orte_universe_info.state = ORTE_UNIVERSE_STATE_PRE_INIT;
orte_universe_info.persistence = false;
orte_universe_info.console = false;
orte_universe_info.console_connected = false;
OBJ_DESTRUCT(&orte_universe_info);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -33,6 +33,9 @@
#include <sys/types.h>
#endif
#include "opal/class/opal_object.h"
#include "opal/class/opal_list.h"
#ifndef _ORTE_UNIV_INFO_H_
#define _ORTE_UNIV_INFO_H_
@ -53,6 +56,9 @@ extern "C" {
* instanced in ompi_rte_init.c */
struct orte_universe_t {
/** This is an object, so it must have a super */
opal_list_item_t super;
orte_universe_state_t state; /**< Indicates state of the universe */
char *name;
char *host;
@ -66,6 +72,8 @@ extern "C" {
};
typedef struct orte_universe_t orte_universe_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_universe_t);
OMPI_DECLSPEC extern orte_universe_t orte_universe_info;

Просмотреть файл

@ -97,7 +97,7 @@ int orte_write_universe_setup_file(char *filename, orte_universe_t *info)
} else {
fprintf(fp, "%s\n", info->seed_uri);
}
fclose(fp);
return ORTE_SUCCESS;
@ -109,9 +109,6 @@ int orte_read_universe_setup_file(char *filename, orte_universe_t *info)
FILE *fp;
int rc;
/* initialize the universe structure */
memset(info, 0, sizeof(orte_universe_t));
fp = fopen(filename, "r");
if (NULL == fp) { /* failed on first read - wait and try again */
fp = fopen(filename, "r");