1
1

Merge pull request #2042 from artpol84/pmix_sdirs

Several fixes related to session directories:
Этот коммит содержится в:
Artem Polyakov 2016-09-07 14:15:47 +07:00 коммит произвёл GitHub
родитель be41b120d0 a9a7f39773
Коммит 9eba1b0b75
18 изменённых файлов: 381 добавлений и 566 удалений

Просмотреть файл

@ -75,6 +75,7 @@ BEGIN_C_DECLS
#define OPAL_PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session
#define OPAL_PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace
#define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc
#define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler

Просмотреть файл

@ -136,10 +136,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
orte_process_info.nodename));
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret);
error = "orte_session_dir";
goto error;
@ -149,29 +146,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
proc-specific session directory. */
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
"output-", NULL, NULL);
/* store the session directory location */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_NSDIR);
kv.type = OPAL_STRING;
kv.data.string = strdup(orte_process_info.job_session_dir);
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&kv);
error = "opal pmix put job sessiondir";
goto error;
}
OBJ_DESTRUCT(&kv);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_PROCDIR);
kv.type = OPAL_STRING;
kv.data.string = strdup(orte_process_info.proc_session_dir);
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&kv);
error = "opal pmix put proc sessiondir";
goto error;
}
OBJ_DESTRUCT(&kv);
}
/* Setup the communication infrastructure */
/*

Просмотреть файл

@ -237,10 +237,7 @@ int orte_ess_base_orted_setup(char **hosts)
/* take a pass thru the session directory code to fillin the
* tmpdir names - don't create anything yet
*/
if (ORTE_SUCCESS != (ret = orte_session_dir(false,
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret);
error = "orte_session_dir define";
goto error;
@ -250,10 +247,7 @@ int orte_ess_base_orted_setup(char **hosts)
*/
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
/* now actually create the directory tree */
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret);
error = "orte_session_dir";
goto error;
@ -277,11 +271,8 @@ int orte_ess_base_orted_setup(char **hosts)
/* define a log file name in the session directory */
snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log",
jobidstring, orte_process_info.nodename);
log_path = opal_os_path(false,
orte_process_info.tmpdir_base,
orte_process_info.top_session_dir,
log_file,
NULL);
log_path = opal_os_path(false, orte_process_info.top_session_dir,
log_file, NULL);
fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640);
if (fd < 0) {

Просмотреть файл

@ -145,10 +145,9 @@ int orte_ess_base_tool_setup(void)
* tmp base where any other session directories on
* this node might be located
*/
if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL,
&orte_process_info.tmpdir_base,
&orte_process_info.top_session_dir,
orte_process_info.nodename, NULL))) {
ret = orte_session_setup_base(NULL);
if (ORTE_SUCCESS != ret ) {
ORTE_ERROR_LOG(ret);
error = "define session dir names";
goto error;

Просмотреть файл

@ -138,7 +138,7 @@ static int rte_init(void)
{
int ret;
char *error = NULL;
char *contact_path, *jobfam_dir;
char *contact_path;
orte_job_t *jdata;
orte_node_t *node;
orte_proc_t *proc;
@ -294,10 +294,7 @@ static int rte_init(void)
/* take a pass thru the session directory code to fillin the
* tmpdir names - don't create anything yet
*/
if (ORTE_SUCCESS != (ret = orte_session_dir(false,
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
error = "orte_session_dir define";
goto error;
}
@ -307,10 +304,7 @@ static int rte_init(void)
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
/* now actually create the directory tree */
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
error = "orte_session_dir";
goto error;
}
@ -586,9 +580,12 @@ static int rte_init(void)
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
"output-", NULL, NULL);
/* save my contact info in a file for others to find */
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
free(jobfam_dir);
if( NULL == orte_process_info.jobfam_session_dir ){
/* has to be set here! */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
goto error;
}
contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, "contact.txt", NULL);
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s writing contact file %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -758,10 +755,9 @@ static int rte_init(void)
true, error, ORTE_ERROR_NAME(ret), ret);
}
/* remove my contact info file, if we have session directories */
if (NULL != orte_process_info.job_session_dir) {
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
free(jobfam_dir);
if (NULL != orte_process_info.jobfam_session_dir) {
contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
"contact.txt", NULL);
unlink(contact_path);
free(contact_path);
}
@ -775,7 +771,6 @@ static int rte_init(void)
static int rte_finalize(void)
{
char *contact_path;
char *jobfam_dir;
if (signals_set) {
/* Remove the epipe handler */
@ -816,10 +811,9 @@ static int rte_finalize(void)
(void) mca_base_framework_close(&opal_pstat_base_framework);
/* remove my contact info file, if we have session directories */
if (NULL != orte_process_info.job_session_dir) {
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
free(jobfam_dir);
if (NULL != orte_process_info.jobfam_session_dir) {
contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
"contact.txt", NULL);
unlink(contact_path);
free(contact_path);
}

Просмотреть файл

@ -94,6 +94,7 @@ static int rte_init(void)
uint16_t u16, *u16ptr;
char **peers=NULL, *mycpuset, **cpusets=NULL;
opal_process_name_t wildcard_rank, pname;
bool bool_val, *bool_ptr = &bool_val, tdir_mca_override = false;
size_t i;
/* run the prolog */
@ -242,6 +243,63 @@ static int rte_init(void)
free(string_key);
}
/* retrieve temp directories info */
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* We want to provide user with ability
* to override RM settings at his own risk
*/
if( NULL == orte_process_info.top_session_dir ){
orte_process_info.top_session_dir = val;
} else {
/* keep the MCA setting */
tdir_mca_override = true;
free(val);
}
val = NULL;
}
if( !tdir_mca_override ){
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* We want to provide user with ability
* to override RM settings at his own risk
*/
if( NULL == orte_process_info.job_session_dir ){
orte_process_info.job_session_dir = val;
} else {
/* keep the MCA setting */
free(val);
tdir_mca_override = true;
}
val = NULL;
}
}
if( !tdir_mca_override ){
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* We want to provide user with ability
* to override RM settings at his own risk
*/
if( NULL == orte_process_info.proc_session_dir ){
orte_process_info.proc_session_dir = val;
} else {
/* keep the MCA setting */
tdir_mca_override = true;
free(val);
}
val = NULL;
}
}
if( !tdir_mca_override ){
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TDIR_RMCLEAN, &wildcard_rank, &bool_ptr, OPAL_BOOL);
if (OPAL_SUCCESS == ret ) {
orte_process_info.rm_session_dirs = bool_val;
}
}
/* retrieve our topology */
val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,

Просмотреть файл

@ -105,6 +105,17 @@ static void recv_ack(int status, orte_process_name_t* sender,
void* cbdata);
static void write_handler(int fd, short event, void *cbdata);
static char *filem_session_dir()
{
char *session_dir = orte_process_info.jobfam_session_dir;
if( NULL == session_dir ){
/* if no job family session dir was provided -
* use the job session dir */
session_dir = orte_process_info.job_session_dir;
}
return session_dir;
}
static int raw_init(void)
{
OBJ_CONSTRUCT(&incoming_files, opal_list_t);
@ -657,25 +668,26 @@ static int create_link(char *my_dir, char *path,
static int raw_link_local_files(orte_job_t *jdata,
orte_app_context_t *app)
{
char *my_dir, *path=NULL;
char *session_dir, *path=NULL;
orte_proc_t *proc;
char *prefix;
int i, j, rc;
orte_filem_raw_incoming_t *inbnd;
opal_list_item_t *item;
char **files=NULL, *bname, *filestring;
/* check my session directory for files I have received and
/* check my jobfam session directory for files I have received and
* symlink them to the proc-level session directory of each
* local process in the job
*
* TODO: @rhc - please check that I've correctly interpret your
* intention here
*/
my_dir = opal_dirname(orte_process_info.job_session_dir);
/* setup */
if (NULL != orte_process_info.tmpdir_base) {
prefix = strdup(orte_process_info.tmpdir_base);
} else {
prefix = NULL;
session_dir = filem_session_dir();
if( NULL == session_dir){
/* we were unable to find any suitable directory */
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
return rc;
}
/* get the list of files this app wants */
@ -692,10 +704,6 @@ static int raw_link_local_files(orte_job_t *jdata,
/* if there are no files to link, then ignore this */
if (NULL == files) {
free(my_dir);
if (NULL != prefix) {
free(prefix);
}
return ORTE_SUCCESS;
}
@ -736,10 +744,8 @@ static int raw_link_local_files(orte_job_t *jdata,
ORTE_NAME_PRINT(&proc->name)));
/* get the session dir name in absolute form */
path = NULL;
rc = orte_session_dir_get_name(&path, &prefix, NULL,
orte_process_info.nodename,
&proc->name);
path = orte_process_info.proc_session_dir;
/* create it, if it doesn't already exist */
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) {
ORTE_ERROR_LOG(rc);
@ -747,11 +753,6 @@ static int raw_link_local_files(orte_job_t *jdata,
* create it - either way, we are done
*/
free(files);
if (NULL != prefix) {
free(prefix);
}
free(path);
free(my_dir);
return rc;
}
@ -775,13 +776,8 @@ static int raw_link_local_files(orte_job_t *jdata,
inbnd->file));
/* cycle thru the link points and create symlinks to them */
for (j=0; NULL != inbnd->link_pts[j]; j++) {
if (ORTE_SUCCESS != (rc = create_link(my_dir, path, inbnd->link_pts[j]))) {
if (ORTE_SUCCESS != (rc = create_link(session_dir, path, inbnd->link_pts[j]))) {
ORTE_ERROR_LOG(rc);
free(my_dir);
free(path);
if (NULL != prefix) {
free(prefix);
}
free(files);
return rc;
}
@ -796,13 +792,8 @@ static int raw_link_local_files(orte_job_t *jdata,
}
}
}
free(path);
}
opal_argv_free(files);
if (NULL != prefix) {
free(prefix);
}
free(my_dir);
return ORTE_SUCCESS;
}
@ -999,7 +990,7 @@ static void recv_files(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
char *file, *jobfam_dir;
char *file, *session_dir;
int32_t nchunk, n, nbytes;
unsigned char data[ORTE_FILEM_RAW_CHUNK_MAX];
int rc;
@ -1086,9 +1077,9 @@ static void recv_files(int status, orte_process_name_t* sender,
incoming->top = strdup(tmp);
free(tmp);
/* define the full path to where we will put it */
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
incoming->fullpath = opal_os_path(false, jobfam_dir, file, NULL);
free(jobfam_dir);
session_dir = filem_session_dir();
incoming->fullpath = opal_os_path(false, session_dir, file, NULL);
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output,
"%s filem:raw: opening target file %s",

Просмотреть файл

@ -164,8 +164,7 @@ static int component_available(void)
/* if session directories were forbidden, then we cannot be used */
if (!orte_create_session_dirs ||
NULL == orte_process_info.tmpdir_base ||
NULL == orte_process_info.top_session_dir) {
NULL == orte_process_info.jobfam_session_dir ) {
return ORTE_ERR_NOT_SUPPORTED;
}
@ -216,9 +215,7 @@ static int component_startup(void)
/* setup the path to the daemon rendezvous point */
memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un));
mca_oob_usock_component.address.sun_family = AF_UNIX;
session = opal_os_path(false, orte_process_info.tmpdir_base,
orte_process_info.top_session_dir,
orte_process_info.jobfam_session_dir,
session = opal_os_path(false, orte_process_info.jobfam_session_dir,
"usock", NULL);
if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) {
opal_output(0, "SESSION DIR TOO LONG");

Просмотреть файл

@ -943,6 +943,7 @@ static int setup_fork(orte_job_t *jdata,
/* forcibly set the local tmpdir base and top session dir to match ours */
opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env);
/* TODO: should we use PMIx key to pass this data? */
opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env);
opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env);
@ -1102,24 +1103,8 @@ static int setup_child(orte_job_t *jdata,
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
}
/* construct the proc's session dir name */
if (NULL != orte_process_info.tmpdir_base) {
value = strdup(orte_process_info.tmpdir_base);
} else {
value = NULL;
}
param = NULL;
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&param, &value, NULL,
orte_process_info.nodename,
&child->name))) {
ORTE_ERROR_LOG(rc);
if (NULL != value) {
free(value);
}
return rc;
}
free(value);
/* pass an envar so the proc can find any files it had prepositioned */
param = orte_process_info.proc_session_dir;
opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env);
/* if the user wanted the cwd to be the proc's session dir, then
@ -1132,12 +1117,10 @@ static int setup_child(orte_job_t *jdata,
/* doesn't exist with correct permissions, and/or we can't
* create it - either way, we are done
*/
free(param);
return rc;
}
/* change to it */
if (0 != chdir(param)) {
free(param);
return ORTE_ERROR;
}
/* It seems that chdir doesn't
@ -1154,6 +1137,5 @@ static int setup_child(orte_job_t *jdata,
/* update the initial wdir value too */
opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env);
}
free(param);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -2134,7 +2134,8 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
/* create the attachment FIFO and setup readevent - cannot be
* done if no session dirs exist!
*/
attach_fifo = opal_os_path(false, orte_process_info.job_session_dir, "debugger_attach_fifo", NULL);
attach_fifo = opal_os_path(false, orte_process_info.job_session_dir,
"debugger_attach_fifo", NULL);
if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) {
opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno);
free(attach_fifo);

Просмотреть файл

@ -262,9 +262,7 @@ int pmix_server_init(void)
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR);
kv->type = OPAL_STRING;
kv->data.string = opal_os_path(false, orte_process_info.tmpdir_base,
orte_process_info.top_session_dir,
orte_process_info.jobfam_session_dir, NULL);
kv->data.string = opal_os_path(false, orte_process_info.jobfam_session_dir, NULL);
opal_list_append(&info, &kv->super);
/* use the same for the system temp directory - this is
* where the system-level tool connections will go */

Просмотреть файл

@ -310,7 +310,7 @@ static int orte_cr_coord_post_restart(void) {
* Add the previous session directory for cleanup
*/
opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
tmp_dir = opal_dirname(orte_process_info.job_session_dir);
tmp_dir = orte_process_info.jobfam_session_dir;
if( NULL != tmp_dir ) {
opal_crs_base_cleanup_append(tmp_dir, true);
free(tmp_dir);

Просмотреть файл

@ -160,7 +160,7 @@ int orte_register_params(void)
&orte_top_session_dir);
if (NULL != orte_top_session_dir) {
if (NULL != orte_process_info.top_session_dir) {
if (NULL != orte_process_info.top_session_dir) {
free(orte_process_info.top_session_dir);
}
orte_process_info.top_session_dir = strdup(orte_top_session_dir);

Просмотреть файл

@ -182,7 +182,7 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect)
/*
* Check to make sure we have access to the top-level directory
*/
headdir = opal_os_path(false, orte_process_info.tmpdir_base, orte_process_info.top_session_dir, NULL);
headdir = orte_process_info.top_session_dir;
if( ORTE_SUCCESS != (ret = opal_os_dirpath_access(headdir, 0) )) {
/* it is okay not to find this as there may not be any
@ -231,7 +231,6 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect)
cleanup:
if( NULL != cur_dirp )
closedir(cur_dirp);
free(headdir);
return (opal_list_is_empty(hnps) ? ORTE_ERR_NOT_FOUND : ORTE_SUCCESS);
}

Просмотреть файл

@ -106,6 +106,14 @@ int orte_proc_info(void)
if (init) {
return ORTE_SUCCESS;
}
{
int delay = 0;
while( delay ){
sleep(1);
}
}
init = true;
OBJ_CONSTRUCT(&orte_process_info.super, opal_proc_t);

Просмотреть файл

@ -122,6 +122,7 @@ struct orte_proc_info_t {
char *jobfam_session_dir; /**< Session directory for this family of jobs (i.e., share same mpirun) */
char *job_session_dir; /**< Session directory for job */
char *proc_session_dir; /**< Session directory for the process */
bool rm_session_dirs; /**< Session directories will be cleaned up by RM */
char *sock_stdin; /**< Path name to temp file for stdin. */
char *sock_stdout; /**< Path name to temp file for stdout. */

Просмотреть файл

@ -73,10 +73,6 @@ static int orte_create_dir(char *directory);
static bool orte_dir_check_file(const char *root, const char *path);
static char *orte_build_job_session_dir(char *top_dir,
orte_process_name_t *proc,
orte_jobid_t jobid);
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
/****************************
@ -112,175 +108,186 @@ static int orte_create_dir(char *directory)
return ret;
}
/*
* Construct the fullpath to the session directory - it
* will consist of "ompi.<hostname>.<effective-uid>", and
* have subdirs:
*
* pid - the pid of the mpirun that oversees this job. Note
* that direct-launched processes will have manufactured
* this value
*
* jobid - jobid of the application being executed
*
* vpid - vpid of the process
*/
int
orte_session_dir_get_name(char **fulldirpath,
char **return_prefix, /* This will come back as the valid tmp dir */
char **return_frontend,
char *hostid,
orte_process_name_t *proc) {
char *hostname = NULL,
*sessions = NULL,
*prefix = NULL,
*frontend = NULL,
*jobfam = NULL,
*job = NULL,
*vpidstr = NULL;
bool prefix_provided = false;
int exit_status = ORTE_SUCCESS;
size_t len;
uid_t uid;
static int _setup_tmpdir_base()
{
int rc = ORTE_SUCCESS;
/* make sure that we have tmpdir_base set
* if we need it
*/
if (NULL == orte_process_info.tmpdir_base) {
orte_process_info.tmpdir_base =
strdup(opal_tmp_directory());
if (NULL == orte_process_info.tmpdir_base) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int _setup_top_session_dir()
{
int rc = ORTE_SUCCESS;
/* get the effective uid */
uid_t uid = geteuid();
/* construct the top_session_dir if we need */
if (NULL == orte_process_info.top_session_dir) {
if (ORTE_SUCCESS != (rc = _setup_tmpdir_base())) {
return rc;
}
if( NULL == orte_process_info.nodename ||
NULL == orte_process_info.tmpdir_base ){
/* we can't setup top session dir */
rc = ORTE_ERR_BAD_PARAM;
goto exit;
}
if (0 > asprintf(&orte_process_info.top_session_dir,
"%s/ompi.%s.%lu", orte_process_info.tmpdir_base,
orte_process_info.nodename, (unsigned long)uid)) {
orte_process_info.top_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int _setup_jobfam_session_dir(orte_process_name_t *proc)
{
int rc = ORTE_SUCCESS;
/* construct the top_session_dir if we need */
if (NULL == orte_process_info.jobfam_session_dir) {
if (ORTE_SUCCESS != (rc = _setup_top_session_dir())) {
return rc;
}
if (ORTE_PROC_IS_HNP) {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/pid.%lu", orte_process_info.top_session_dir,
(unsigned long)orte_process_info.pid) ) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
/* we were not given one, so define it */
if (NULL == proc || (ORTE_JOBID_INVALID == proc->jobid) ) {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/jobfam", orte_process_info.top_session_dir) ) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/jf.%d", orte_process_info.top_session_dir,
ORTE_JOB_FAMILY(proc->jobid))) {
orte_process_info.jobfam_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
}
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int
_setup_job_session_dir(orte_process_name_t *proc)
{
int rc = ORTE_SUCCESS;
/* construct the top_session_dir if we need */
if( NULL == orte_process_info.job_session_dir ){
if( ORTE_SUCCESS != (rc = _setup_jobfam_session_dir(proc)) ){
return rc;
}
if (ORTE_JOBID_INVALID != proc->jobid) {
if (0 > asprintf(&orte_process_info.job_session_dir,
"%s/%d", orte_process_info.jobfam_session_dir,
ORTE_LOCAL_JOBID(proc->jobid))) {
orte_process_info.job_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
orte_process_info.job_session_dir = NULL;
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int
_setup_proc_session_dir(orte_process_name_t *proc)
{
int rc = ORTE_SUCCESS;
/* construct the top_session_dir if we need */
if( NULL == orte_process_info.proc_session_dir ){
if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
return rc;
}
if (ORTE_VPID_INVALID != proc->vpid) {
if (0 > asprintf(&orte_process_info.proc_session_dir,
"%s/%d", orte_process_info.job_session_dir,
proc->vpid)) {
orte_process_info.proc_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
orte_process_info.proc_session_dir = NULL;
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
int orte_session_setup_base(orte_process_name_t *proc)
{
int rc;
/* Ensure that system info is set */
orte_proc_info();
/* get the effective uid */
uid = geteuid();
/*
* set the 'hostname'
*/
if( NULL != hostid) { /* User specified version */
hostname = strdup(hostid);
}
else { /* check if it is set elsewhere */
if( NULL != orte_process_info.nodename)
hostname = strdup(orte_process_info.nodename);
else {
/* Couldn't find it, so fail */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
exit_status = ORTE_ERR_BAD_PARAM;
goto cleanup;
}
/* setup job and proc session directories */
if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
return rc;
}
/* construct the frontend of the session directory*/
if (NULL != orte_process_info.top_session_dir) {
frontend = strdup(orte_process_info.top_session_dir);
} else { /* If not set then construct it */
if (0 > asprintf(&frontend, "ompi.%s.%lu", hostname, (unsigned long)uid)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
/* construct the next level down, which belongs to the
* job family. This is related to the mpirun that launched
* the job, or is an arbitrary (agreed upon) value if
* direct launched */
if (ORTE_PROC_IS_HNP) {
if (0 > asprintf(&jobfam, "pid.%lu", (unsigned long)orte_process_info.pid)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
orte_process_info.jobfam_session_dir = strdup(jobfam);
} else if (NULL != orte_process_info.jobfam_session_dir) {
/* we had a job family session dir passed down to us by mpirun */
jobfam = strdup(orte_process_info.jobfam_session_dir);
} else {
/* we were not given one, so define it */
if (NULL == proc) {
jobfam = strdup("jobfam");
} else {
if (0 > asprintf(&jobfam, "jf.%d", ORTE_JOB_FAMILY(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
orte_process_info.jobfam_session_dir = strdup(jobfam);
}
/*
* Construct the session directory
*/
/* If we were given a valid vpid then we can construct it fully */
if( NULL != proc) {
if (ORTE_VPID_INVALID != proc->vpid) {
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
if (ORTE_SUCCESS != orte_util_convert_vpid_to_string(&vpidstr, proc->vpid)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
sessions = opal_os_path(false, frontend, jobfam, job, vpidstr, NULL);
if( NULL == sessions ) {
ORTE_ERROR_LOG(ORTE_ERROR);
exit_status = ORTE_ERROR;
goto cleanup;
}
} else if (ORTE_JOBID_INVALID != proc->jobid) {
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
sessions = opal_os_path( false, frontend, jobfam, job, NULL );
if( NULL == sessions ) {
ORTE_ERROR_LOG(ORTE_ERROR);
exit_status = ORTE_ERROR;
goto cleanup;
}
} else {
sessions = strdup(frontend); /* must dup this to avoid double-free later */
}
} else {
/* If we were not given a proc at all, then we just set it to frontend */
sessions = strdup(frontend); /* must dup this to avoid double-free later */
}
/*
* If the user specified an invalid prefix, or no prefix at all
* we need to keep looking
*/
if( NULL != fulldirpath && NULL != *fulldirpath) {
free(*fulldirpath);
*fulldirpath = NULL;
}
if( NULL != return_prefix && NULL != *return_prefix) { /* use the user specified one, if available */
prefix = strdup(*return_prefix);
prefix_provided = true;
}
/* Try to find a proper alternative prefix */
else if (NULL != orte_process_info.tmpdir_base) { /* stored value */
prefix = strdup(orte_process_info.tmpdir_base);
}
else { /* General Environment var */
prefix = strdup(opal_tmp_directory());
}
len = strlen(prefix);
/* check for a trailing path separator */
if (OPAL_PATH_SEP[0] == prefix[len-1]) {
prefix[len-1] = '\0';
if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){
return rc;
}
/* BEFORE doing anything else, check to see if this prefix is
* allowed by the system
*/
if (NULL != orte_prohibited_session_dirs) {
if (NULL != orte_prohibited_session_dirs ||
NULL != orte_process_info.tmpdir_base ) {
char **list;
int i, len;
/* break the string into tokens - it should be
@ -291,97 +298,36 @@ orte_session_dir_get_name(char **fulldirpath,
/* cycle through the list */
for (i=0; i < len; i++) {
/* check if prefix matches */
if (0 == strncmp(prefix, list[i], strlen(list[i]))) {
if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) {
/* this is a prohibited location */
orte_show_help("help-orte-runtime.txt",
"orte:session:dir:prohibited",
true, prefix, orte_prohibited_session_dirs);
true, orte_process_info.tmpdir_base,
orte_prohibited_session_dirs);
opal_argv_free(list);
free(prefix);
free(sessions);
free(hostname);
free(frontend);
return ORTE_ERR_FATAL;
}
}
opal_argv_free(list); /* done with this */
}
/*
* Construct the absolute final path, if requested
*/
if (NULL != fulldirpath) {
*fulldirpath = opal_os_path(false, prefix, sessions, NULL);
}
/*
* Return the frontend and prefix, if user requested we do so
*/
if (NULL != return_frontend) {
*return_frontend = strdup(frontend);
}
if (!prefix_provided && NULL != return_prefix) {
*return_prefix = strdup(prefix);
}
cleanup:
if(NULL != hostname) {
free(hostname);
}
if(NULL != sessions) {
free(sessions);
}
if (NULL != prefix) {
free(prefix);
}
if (NULL != frontend) {
free(frontend);
}
if (NULL != jobfam) {
free(jobfam);
}
if (NULL != job) {
free(job);
}
if (NULL != vpidstr) {
free(vpidstr);
}
return exit_status;
return ORTE_SUCCESS;
}
/*
* Construct the session directory and create it if necessary
*/
int orte_session_dir(bool create,
char *prefix, char *hostid,
orte_process_name_t *proc)
int orte_session_dir(bool create, orte_process_name_t *proc)
{
char *fulldirpath = NULL,
*frontend = NULL,
*sav = NULL;
int rc = ORTE_SUCCESS;
char *local_prefix = NULL;
/* use the specified prefix, if one was given */
if (NULL != prefix) {
local_prefix = strdup(prefix);
}
/*
* Get the session directory full name
*/
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&fulldirpath,
&local_prefix,
&frontend,
hostid,
proc))) {
if (ORTE_SUCCESS != (rc = orte_session_setup_base(proc))) {
if (ORTE_ERR_FATAL == rc) {
/* this indicates we should abort quietly */
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* otherwise, bark a little first */
ORTE_ERROR_LOG(rc);
goto cleanup;
}
@ -389,73 +335,26 @@ int orte_session_dir(bool create,
* Now that we have the full path, go ahead and create it if necessary
*/
if( create ) {
if( ORTE_SUCCESS != (rc = orte_create_dir(fulldirpath) ) ) {
if( ORTE_SUCCESS != (rc = orte_create_dir(orte_process_info.proc_session_dir)) ) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
/* update global structure fields */
if (NULL != orte_process_info.tmpdir_base) {
free(orte_process_info.tmpdir_base);
}
orte_process_info.tmpdir_base = strdup(local_prefix);
if (NULL != orte_process_info.top_session_dir) {
free(orte_process_info.top_session_dir);
orte_process_info.top_session_dir = NULL;
}
if (NULL != frontend) {
orte_process_info.top_session_dir = strdup(frontend);
}
/*
* Set the process session directory
*/
if (ORTE_VPID_INVALID != proc->vpid) {
if (NULL != orte_process_info.proc_session_dir) {
free(orte_process_info.proc_session_dir);
}
orte_process_info.proc_session_dir = strdup(fulldirpath);
/* Strip off last part of directory structure */
sav = opal_dirname(fulldirpath);
free(fulldirpath);
fulldirpath = sav;
sav = NULL;
}
/*
* Set the job session directory
*/
if (ORTE_JOBID_INVALID != proc->jobid) {
if (NULL != orte_process_info.job_session_dir) {
free(orte_process_info.job_session_dir);
}
orte_process_info.job_session_dir = strdup(fulldirpath);
}
if (orte_debug_flag) {
opal_output(0, "procdir: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir));
opal_output(0, "jobdir: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir));
opal_output(0, "top: %s",
opal_output(0, "top: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.jobfam_session_dir));
opal_output(0, "top: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir));
opal_output(0, "tmp: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base));
}
cleanup:
if (NULL != local_prefix) {
free(local_prefix);
}
if(NULL != fulldirpath) {
free(fulldirpath);
}
if(NULL != frontend) {
free(frontend);
}
return rc;
}
@ -466,16 +365,14 @@ int
orte_session_dir_cleanup(orte_jobid_t jobid)
{
int rc = ORTE_SUCCESS;
char *tmp = NULL;
char *job_session_dir=NULL;
if (!orte_create_session_dirs) {
/* didn't create them */
if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
/* we haven't created them or RM will clean them up for us*/
return ORTE_SUCCESS;
}
if (NULL == orte_process_info.tmpdir_base &&
NULL == orte_process_info.top_session_dir) {
if (NULL == orte_process_info.job_session_dir ||
NULL == orte_process_info.proc_session_dir) {
/* this should never happen - it means we are calling
* cleanup *before* properly setting up the session
* dir system. This leaves open the possibility of
@ -486,37 +383,30 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
goto CLEANUP;
}
/* need to setup the top_session_dir with the prefix */
tmp = opal_os_path(false,
orte_process_info.tmpdir_base,
orte_process_info.top_session_dir, NULL);
/* we can only blow away session directories for our job family */
job_session_dir = orte_build_job_session_dir(tmp, ORTE_PROC_MY_NAME, jobid);
if (NULL == job_session_dir) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* recursively blow the whole session away for our job family,
* saving only output files
*/
opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file);
opal_os_dirpath_destroy(orte_process_info.job_session_dir,
true, orte_dir_check_file);
/* now attempt to eliminate the top level directory itself - this
* will fail if anything is present, but ensures we cleanup if
* we are the last one out
*/
opal_os_dirpath_destroy(tmp, false, orte_dir_check_file);
if( NULL != orte_process_info.top_session_dir ){
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
false, orte_dir_check_file);
}
if (NULL != job_session_dir && opal_os_dirpath_is_empty(job_session_dir)) {
if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
if (orte_debug_flag) {
opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting");
}
rmdir(job_session_dir);
rmdir(orte_process_info.job_session_dir);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) {
if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
opal_output(0, "sess_dir_cleanup: job session dir does not exist");
} else {
opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving");
@ -525,24 +415,27 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
goto CLEANUP;
}
if (opal_os_dirpath_is_empty(tmp)) {
if (orte_debug_flag) {
opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
}
rmdir(tmp);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) {
opal_output(0, "sess_dir_cleanup: top session dir does not exist");
} else {
opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
if ( NULL != orte_process_info.top_session_dir ){
if( opal_os_dirpath_is_empty(orte_process_info.top_session_dir) ) {
if (orte_debug_flag) {
opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
}
}
rmdir(orte_process_info.top_session_dir);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
opal_output(0, "sess_dir_cleanup: top session dir does not exist");
} else {
opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
}
}
}
}
CLEANUP:
if (NULL != tmp) free(tmp);
if (NULL != job_session_dir) free(job_session_dir);
return rc;
}
@ -551,66 +444,42 @@ int
orte_session_dir_finalize(orte_process_name_t *proc)
{
int rc;
char *tmp;
char *job_session_dir, *vpid, *proc_session_dir;
if (!orte_create_session_dirs) {
/* didn't create them */
if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
/* we haven't created them or RM will clean them up for us*/
return ORTE_SUCCESS;
}
if (NULL == orte_process_info.tmpdir_base &&
NULL == orte_process_info.top_session_dir) {
if (NULL == orte_process_info.job_session_dir ||
NULL == orte_process_info.proc_session_dir) {
/* this should never happen - it means we are calling
* cleanup *before* properly setting up the session
* dir system. Protect against the possibility of
* dir system. This leaves open the possibility of
* accidentally removing directories we shouldn't
* touch by returning
* touch
*/
return ORTE_ERR_NOT_INITIALIZED;
rc = ORTE_ERR_NOT_INITIALIZED;
goto CLEANUP;
}
/* need to setup the top_session_dir with the prefix */
tmp = opal_os_path(false,
orte_process_info.tmpdir_base,
orte_process_info.top_session_dir, NULL);
/* define the proc and job session directories for this process */
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, proc->vpid))) {
ORTE_ERROR_LOG(rc);
free(tmp);
return rc;
}
job_session_dir = orte_build_job_session_dir(tmp, proc, proc->jobid);
if( NULL == job_session_dir) {
free(tmp);
free(vpid);
return ORTE_ERR_OUT_OF_RESOURCE;
}
proc_session_dir = opal_os_path( false, job_session_dir, vpid, NULL );
if( NULL == proc_session_dir ) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
free(tmp);
free(vpid);
free(job_session_dir);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_os_dirpath_destroy(proc_session_dir,
opal_os_dirpath_destroy(orte_process_info.proc_session_dir,
false, orte_dir_check_file);
opal_os_dirpath_destroy(job_session_dir,
false, orte_dir_check_file);
opal_os_dirpath_destroy(tmp,
opal_os_dirpath_destroy(orte_process_info.job_session_dir,
false, orte_dir_check_file);
if( NULL != orte_process_info.top_session_dir ){
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
false, orte_dir_check_file);
}
if (opal_os_dirpath_is_empty(proc_session_dir)) {
if (opal_os_dirpath_is_empty(orte_process_info.proc_session_dir)) {
if (orte_debug_flag) {
opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting");
}
rmdir(proc_session_dir);
rmdir(orte_process_info.proc_session_dir);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(proc_session_dir, 0)) {
if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.proc_session_dir, 0)) {
opal_output(0, "sess_dir_finalize: proc session dir does not exist");
} else {
opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving");
@ -619,14 +488,15 @@ orte_session_dir_finalize(orte_process_name_t *proc)
goto CLEANUP;
}
if (opal_os_dirpath_is_empty(job_session_dir)) {
if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
if (orte_debug_flag) {
opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
}
rmdir(job_session_dir);
rmdir(orte_process_info.job_session_dir);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) {
if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
opal_output(0, "sess_dir_finalize: job session dir does not exist");
} else {
opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
@ -635,26 +505,25 @@ orte_session_dir_finalize(orte_process_name_t *proc)
goto CLEANUP;
}
if (opal_os_dirpath_is_empty(tmp)) {
if (orte_debug_flag) {
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
}
rmdir(tmp);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) {
opal_output(0, "sess_dir_finalize: top session dir does not exist");
} else {
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
if(NULL != orte_process_info.top_session_dir) {
if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
if (orte_debug_flag) {
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
}
}
rmdir(orte_process_info.top_session_dir);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
opal_output(0, "sess_dir_finalize: top session dir does not exist");
} else {
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
}
}
}
}
CLEANUP:
free(tmp);
free(vpid);
free(job_session_dir);
free(proc_session_dir);
return ORTE_SUCCESS;
}
@ -680,33 +549,3 @@ orte_dir_check_file(const char *root, const char *path)
return true;
}
static char *orte_build_job_session_dir(char *top_dir,
orte_process_name_t *proc,
orte_jobid_t jobid)
{
char *job_session_dir;
if (ORTE_JOBID_WILDCARD != jobid) {
char *job = NULL;
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
job_session_dir = NULL;
goto out;
}
job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, job, NULL);
free(job);
if (NULL == job_session_dir) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
} else {
job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, NULL);
if( NULL == job_session_dir) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
}
out:
return job_session_dir;
}

Просмотреть файл

@ -99,19 +99,6 @@ BEGIN_C_DECLS
* locate an already existing universe for reconnection
* purposes. If set to "true", then the function
* creates the directory, if possible.
* @param prefix A string variable indicating where the user
* stipulated the directory should be found or
* placed. A value of "NULL" indicates that the user
* specified no location - hence, the function explores
* a range of "standard" locations.
* @param hostid Name of the host on which the session directory is
* being built. Used to build the name of the
* "openmpi-sessions-[user]@[host]:[batch]" branch of
* the directory tree. NULL indicates that the nodename
* found in orte_process_info is to be used.
* @param batchid Batch job name, used in batch scheduling
* systems. NULL indicates that the default of "0" is
* to be used.
* @param proc Pointer to a process name for which the session
* dir name is desired
*
@ -120,18 +107,13 @@ BEGIN_C_DECLS
* @retval OMPI_ERROR The directory cannot be found (if create is
* "false") or created (if create is "true").
*/
ORTE_DECLSPEC int orte_session_dir(bool create, char *prefix, char *hostid,
orte_process_name_t *proc);
ORTE_DECLSPEC int orte_session_dir(bool create, orte_process_name_t *proc);
/*
* Construct the session directory name from the input parameters.
* This function does no checking that the directory exists, or can be used
* Setup session-related directory paths
*/
ORTE_DECLSPEC int orte_session_dir_get_name(char **fulldirpath,
char **prfx,
char **frontend,
char *hostid,
orte_process_name_t *proc);
ORTE_DECLSPEC int orte_session_setup_base(orte_process_name_t *proc);
/** The orte_session_dir_finalize() function performs a cleanup of the
* session directory tree. It first removes the session directory for