From 81195ab7244c6b784f7cd7a153e55bfb8c7d3399 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Thu, 1 Sep 2016 16:03:10 +0700 Subject: [PATCH] Several fixes related to session directories: * enable OMPI to retrieve paths from RM through PMIx * cleanups related to tempdirs. --- orte/mca/ess/base/ess_base_std_app.c | 28 +- orte/mca/ess/base/ess_base_std_orted.c | 17 +- orte/mca/ess/base/ess_base_std_tool.c | 7 +- orte/mca/ess/hnp/ess_hnp_module.c | 36 +- orte/mca/ess/pmi/ess_pmi_module.c | 37 ++ orte/mca/filem/raw/filem_raw_module.c | 67 +-- orte/mca/oob/usock/oob_usock_component.c | 7 +- orte/mca/schizo/ompi/schizo_ompi.c | 22 +- orte/orted/orted_submit.c | 3 +- orte/orted/pmix/pmix_server.c | 4 +- orte/runtime/orte_cr.c | 2 +- orte/runtime/orte_mca_params.c | 2 +- orte/util/hnp_contact.c | 3 +- orte/util/proc_info.c | 8 + orte/util/session_dir.c | 653 +++++++++-------------- orte/util/session_dir.h | 26 +- 16 files changed, 358 insertions(+), 564 deletions(-) diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index e9da311f16..770731f88a 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -136,10 +136,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); - if (ORTE_SUCCESS != (ret = orte_session_dir(true, - orte_process_info.tmpdir_base, - orte_process_info.nodename, - ORTE_PROC_MY_NAME))) { + if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) { ORTE_ERROR_LOG(ret); error = "orte_session_dir"; goto error; @@ -149,29 +146,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); - /* store the session directory location */ - OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_PMIX_NSDIR); - kv.type = OPAL_STRING; - kv.data.string = strdup(orte_process_info.job_session_dir); - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) { - ORTE_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - error = "opal pmix put job sessiondir"; - goto error; - } - OBJ_DESTRUCT(&kv); - OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_PMIX_PROCDIR); - kv.type = OPAL_STRING; - kv.data.string = strdup(orte_process_info.proc_session_dir); - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) { - ORTE_ERROR_LOG(ret); - OBJ_DESTRUCT(&kv); - error = "opal pmix put proc sessiondir"; - goto error; - } - OBJ_DESTRUCT(&kv); } /* Setup the communication infrastructure */ /* diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index baa606f770..da598de764 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -237,10 +237,7 @@ int orte_ess_base_orted_setup(char **hosts) /* take a pass thru the session directory code to fillin the * tmpdir names - don't create anything yet */ - if (ORTE_SUCCESS != (ret = orte_session_dir(false, - orte_process_info.tmpdir_base, - orte_process_info.nodename, - ORTE_PROC_MY_NAME))) { + if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) { ORTE_ERROR_LOG(ret); error = "orte_session_dir define"; goto error; @@ -250,10 +247,7 @@ int orte_ess_base_orted_setup(char **hosts) */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); /* now actually create the directory tree */ - if (ORTE_SUCCESS != (ret = orte_session_dir(true, - orte_process_info.tmpdir_base, - orte_process_info.nodename, - ORTE_PROC_MY_NAME))) { + if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) { ORTE_ERROR_LOG(ret); error = "orte_session_dir"; goto error; @@ -277,11 +271,8 @@ int orte_ess_base_orted_setup(char **hosts) /* define a log file name in the session directory */ snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log", jobidstring, orte_process_info.nodename); - log_path = opal_os_path(false, - orte_process_info.tmpdir_base, - orte_process_info.top_session_dir, - log_file, - NULL); + log_path = opal_os_path(false, orte_process_info.top_session_dir, + log_file, NULL); fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640); if (fd < 0) { diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index b92fd4693e..48ff8f26d6 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -145,10 +145,9 @@ int orte_ess_base_tool_setup(void) * tmp base where any other session directories on * this node might be located */ - if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL, - &orte_process_info.tmpdir_base, - &orte_process_info.top_session_dir, - orte_process_info.nodename, NULL))) { + + ret = orte_session_setup_base(NULL); + if (ORTE_SUCCESS != ret ) { ORTE_ERROR_LOG(ret); error = "define session dir names"; goto error; diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index c7984a16ec..2f9368f9dc 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -138,7 +138,7 @@ static int rte_init(void) { int ret; char *error = NULL; - char *contact_path, *jobfam_dir; + char *contact_path; orte_job_t *jdata; orte_node_t *node; orte_proc_t *proc; @@ -294,10 +294,7 @@ static int rte_init(void) /* take a pass thru the session directory code to fillin the * tmpdir names - don't create anything yet */ - if (ORTE_SUCCESS != (ret = orte_session_dir(false, - orte_process_info.tmpdir_base, - orte_process_info.nodename, - ORTE_PROC_MY_NAME))) { + if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) { error = "orte_session_dir define"; goto error; } @@ -307,10 +304,7 @@ static int rte_init(void) orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); /* now actually create the directory tree */ - if (ORTE_SUCCESS != (ret = orte_session_dir(true, - orte_process_info.tmpdir_base, - orte_process_info.nodename, - ORTE_PROC_MY_NAME))) { + if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) { error = "orte_session_dir"; goto error; } @@ -586,9 +580,12 @@ static int rte_init(void) opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); /* save my contact info in a file for others to find */ - jobfam_dir = opal_dirname(orte_process_info.job_session_dir); - contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); - free(jobfam_dir); + if( NULL == orte_process_info.jobfam_session_dir ){ + /* has to be set here! */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + goto error; + } + contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, "contact.txt", NULL); OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s writing contact file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -758,10 +755,9 @@ static int rte_init(void) true, error, ORTE_ERROR_NAME(ret), ret); } /* remove my contact info file, if we have session directories */ - if (NULL != orte_process_info.job_session_dir) { - jobfam_dir = opal_dirname(orte_process_info.job_session_dir); - contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); - free(jobfam_dir); + if (NULL != orte_process_info.jobfam_session_dir) { + contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, + "contact.txt", NULL); unlink(contact_path); free(contact_path); } @@ -775,7 +771,6 @@ static int rte_init(void) static int rte_finalize(void) { char *contact_path; - char *jobfam_dir; if (signals_set) { /* Remove the epipe handler */ @@ -816,10 +811,9 @@ static int rte_finalize(void) (void) mca_base_framework_close(&opal_pstat_base_framework); /* remove my contact info file, if we have session directories */ - if (NULL != orte_process_info.job_session_dir) { - jobfam_dir = opal_dirname(orte_process_info.job_session_dir); - contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); - free(jobfam_dir); + if (NULL != orte_process_info.jobfam_session_dir) { + contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, + "contact.txt", NULL); unlink(contact_path); free(contact_path); } diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index ca3b63d724..a2ee833b30 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -242,6 +242,43 @@ static int rte_init(void) free(string_key); } + /* retrieve temp directories info */ + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + /* TODO: who has precedence - pmix of MCA setting??? */ + if( NULL == orte_process_info.top_session_dir ){ + orte_process_info.top_session_dir = val; + } else { + /* keep the MCA setting */ + free(val); + } + val = NULL; + } + + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + /* TODO: who has precedence - pmix of MCA setting??? */ + if( NULL == orte_process_info.job_session_dir ){ + orte_process_info.job_session_dir = val; + } else { + /* keep the MCA setting */ + free(val); + } + val = NULL; + } + + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + /* TODO: who has precedence - pmix of MCA setting??? */ + if( NULL == orte_process_info.proc_session_dir ){ + orte_process_info.proc_session_dir = val; + } else { + /* keep the MCA setting */ + free(val); + } + val = NULL; + } + /* retrieve our topology */ val = NULL; OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO, diff --git a/orte/mca/filem/raw/filem_raw_module.c b/orte/mca/filem/raw/filem_raw_module.c index 3070bc6987..5c6dc34b72 100644 --- a/orte/mca/filem/raw/filem_raw_module.c +++ b/orte/mca/filem/raw/filem_raw_module.c @@ -105,6 +105,17 @@ static void recv_ack(int status, orte_process_name_t* sender, void* cbdata); static void write_handler(int fd, short event, void *cbdata); +static char *filem_session_dir() +{ + char *session_dir = orte_process_info.jobfam_session_dir; + if( NULL == session_dir ){ + /* if no job family session dir was provided - + * use the job session dir */ + session_dir = orte_process_info.job_session_dir; + } + return session_dir; +} + static int raw_init(void) { OBJ_CONSTRUCT(&incoming_files, opal_list_t); @@ -657,25 +668,26 @@ static int create_link(char *my_dir, char *path, static int raw_link_local_files(orte_job_t *jdata, orte_app_context_t *app) { - char *my_dir, *path=NULL; + char *session_dir, *path=NULL; orte_proc_t *proc; - char *prefix; int i, j, rc; orte_filem_raw_incoming_t *inbnd; opal_list_item_t *item; char **files=NULL, *bname, *filestring; - /* check my session directory for files I have received and + /* check my jobfam session directory for files I have received and * symlink them to the proc-level session directory of each * local process in the job + * + * TODO: @rhc - please check that I've correctly interpret your + * intention here */ - my_dir = opal_dirname(orte_process_info.job_session_dir); - - /* setup */ - if (NULL != orte_process_info.tmpdir_base) { - prefix = strdup(orte_process_info.tmpdir_base); - } else { - prefix = NULL; + session_dir = filem_session_dir(); + if( NULL == session_dir){ + /* we were unable to find any suitable directory */ + rc = ORTE_ERR_BAD_PARAM; + ORTE_ERROR_LOG(rc); + return rc; } /* get the list of files this app wants */ @@ -692,10 +704,6 @@ static int raw_link_local_files(orte_job_t *jdata, /* if there are no files to link, then ignore this */ if (NULL == files) { - free(my_dir); - if (NULL != prefix) { - free(prefix); - } return ORTE_SUCCESS; } @@ -736,10 +744,8 @@ static int raw_link_local_files(orte_job_t *jdata, ORTE_NAME_PRINT(&proc->name))); /* get the session dir name in absolute form */ - path = NULL; - rc = orte_session_dir_get_name(&path, &prefix, NULL, - orte_process_info.nodename, - &proc->name); + path = orte_process_info.proc_session_dir; + /* create it, if it doesn't already exist */ if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) { ORTE_ERROR_LOG(rc); @@ -747,11 +753,6 @@ static int raw_link_local_files(orte_job_t *jdata, * create it - either way, we are done */ free(files); - if (NULL != prefix) { - free(prefix); - } - free(path); - free(my_dir); return rc; } @@ -775,13 +776,8 @@ static int raw_link_local_files(orte_job_t *jdata, inbnd->file)); /* cycle thru the link points and create symlinks to them */ for (j=0; NULL != inbnd->link_pts[j]; j++) { - if (ORTE_SUCCESS != (rc = create_link(my_dir, path, inbnd->link_pts[j]))) { + if (ORTE_SUCCESS != (rc = create_link(session_dir, path, inbnd->link_pts[j]))) { ORTE_ERROR_LOG(rc); - free(my_dir); - free(path); - if (NULL != prefix) { - free(prefix); - } free(files); return rc; } @@ -796,13 +792,8 @@ static int raw_link_local_files(orte_job_t *jdata, } } } - free(path); } opal_argv_free(files); - if (NULL != prefix) { - free(prefix); - } - free(my_dir); return ORTE_SUCCESS; } @@ -999,7 +990,7 @@ static void recv_files(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { - char *file, *jobfam_dir; + char *file, *session_dir; int32_t nchunk, n, nbytes; unsigned char data[ORTE_FILEM_RAW_CHUNK_MAX]; int rc; @@ -1086,9 +1077,9 @@ static void recv_files(int status, orte_process_name_t* sender, incoming->top = strdup(tmp); free(tmp); /* define the full path to where we will put it */ - jobfam_dir = opal_dirname(orte_process_info.job_session_dir); - incoming->fullpath = opal_os_path(false, jobfam_dir, file, NULL); - free(jobfam_dir); + session_dir = filem_session_dir(); + + incoming->fullpath = opal_os_path(false, session_dir, file, NULL); OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output, "%s filem:raw: opening target file %s", diff --git a/orte/mca/oob/usock/oob_usock_component.c b/orte/mca/oob/usock/oob_usock_component.c index 4d9159a5a6..f8969409a7 100644 --- a/orte/mca/oob/usock/oob_usock_component.c +++ b/orte/mca/oob/usock/oob_usock_component.c @@ -164,8 +164,7 @@ static int component_available(void) /* if session directories were forbidden, then we cannot be used */ if (!orte_create_session_dirs || - NULL == orte_process_info.tmpdir_base || - NULL == orte_process_info.top_session_dir) { + NULL == orte_process_info.jobfam_session_dir ) { return ORTE_ERR_NOT_SUPPORTED; } @@ -216,9 +215,7 @@ static int component_startup(void) /* setup the path to the daemon rendezvous point */ memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un)); mca_oob_usock_component.address.sun_family = AF_UNIX; - session = opal_os_path(false, orte_process_info.tmpdir_base, - orte_process_info.top_session_dir, - orte_process_info.jobfam_session_dir, + session = opal_os_path(false, orte_process_info.jobfam_session_dir, "usock", NULL); if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) { opal_output(0, "SESSION DIR TOO LONG"); diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index cb0773d545..bd53a2cddb 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -943,6 +943,7 @@ static int setup_fork(orte_job_t *jdata, /* forcibly set the local tmpdir base and top session dir to match ours */ opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env); + /* TODO: should we use PMIx key to pass this data? */ opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env); opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env); @@ -1102,24 +1103,8 @@ static int setup_child(orte_job_t *jdata, ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE); } - /* construct the proc's session dir name */ - if (NULL != orte_process_info.tmpdir_base) { - value = strdup(orte_process_info.tmpdir_base); - } else { - value = NULL; - } - param = NULL; - if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(¶m, &value, NULL, - orte_process_info.nodename, - &child->name))) { - ORTE_ERROR_LOG(rc); - if (NULL != value) { - free(value); - } - return rc; - } - free(value); /* pass an envar so the proc can find any files it had prepositioned */ + param = orte_process_info.proc_session_dir; opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env); /* if the user wanted the cwd to be the proc's session dir, then @@ -1132,12 +1117,10 @@ static int setup_child(orte_job_t *jdata, /* doesn't exist with correct permissions, and/or we can't * create it - either way, we are done */ - free(param); return rc; } /* change to it */ if (0 != chdir(param)) { - free(param); return ORTE_ERROR; } /* It seems that chdir doesn't @@ -1154,6 +1137,5 @@ static int setup_child(orte_job_t *jdata, /* update the initial wdir value too */ opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env); } - free(param); return ORTE_SUCCESS; } diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index f3fa426db8..f48ef950c1 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -2132,7 +2132,8 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata) /* create the attachment FIFO and setup readevent - cannot be * done if no session dirs exist! */ - attach_fifo = opal_os_path(false, orte_process_info.job_session_dir, "debugger_attach_fifo", NULL); + attach_fifo = opal_os_path(false, orte_process_info.job_session_dir, + "debugger_attach_fifo", NULL); if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) { opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno); free(attach_fifo); diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 7073900be7..f374a3c80f 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -262,9 +262,7 @@ int pmix_server_init(void) kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR); kv->type = OPAL_STRING; - kv->data.string = opal_os_path(false, orte_process_info.tmpdir_base, - orte_process_info.top_session_dir, - orte_process_info.jobfam_session_dir, NULL); + kv->data.string = opal_os_path(false, orte_process_info.jobfam_session_dir, NULL); opal_list_append(&info, &kv->super); /* use the same for the system temp directory - this is * where the system-level tool connections will go */ diff --git a/orte/runtime/orte_cr.c b/orte/runtime/orte_cr.c index 27cff49321..b203c1980d 100644 --- a/orte/runtime/orte_cr.c +++ b/orte/runtime/orte_cr.c @@ -310,7 +310,7 @@ static int orte_cr_coord_post_restart(void) { * Add the previous session directory for cleanup */ opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true); - tmp_dir = opal_dirname(orte_process_info.job_session_dir); + tmp_dir = orte_process_info.jobfam_session_dir; if( NULL != tmp_dir ) { opal_crs_base_cleanup_append(tmp_dir, true); free(tmp_dir); diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 2b6f015df6..27e4778e8f 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -160,7 +160,7 @@ int orte_register_params(void) &orte_top_session_dir); if (NULL != orte_top_session_dir) { - if (NULL != orte_process_info.top_session_dir) { + if (NULL != orte_process_info.top_session_dir) { free(orte_process_info.top_session_dir); } orte_process_info.top_session_dir = strdup(orte_top_session_dir); diff --git a/orte/util/hnp_contact.c b/orte/util/hnp_contact.c index 5436cd4e39..71166412d0 100644 --- a/orte/util/hnp_contact.c +++ b/orte/util/hnp_contact.c @@ -182,7 +182,7 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect) /* * Check to make sure we have access to the top-level directory */ - headdir = opal_os_path(false, orte_process_info.tmpdir_base, orte_process_info.top_session_dir, NULL); + headdir = orte_process_info.top_session_dir; if( ORTE_SUCCESS != (ret = opal_os_dirpath_access(headdir, 0) )) { /* it is okay not to find this as there may not be any @@ -231,7 +231,6 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect) cleanup: if( NULL != cur_dirp ) closedir(cur_dirp); - free(headdir); return (opal_list_is_empty(hnps) ? ORTE_ERR_NOT_FOUND : ORTE_SUCCESS); } diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index 48961ff48a..1124787886 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -108,6 +108,14 @@ int orte_proc_info(void) if (init) { return ORTE_SUCCESS; } + + { + int delay = 0; + while( delay ){ + sleep(1); + } + } + init = true; OBJ_CONSTRUCT(&orte_process_info.super, opal_proc_t); diff --git a/orte/util/session_dir.c b/orte/util/session_dir.c index cebbc700ed..574fa3b42a 100644 --- a/orte/util/session_dir.c +++ b/orte/util/session_dir.c @@ -73,10 +73,6 @@ static int orte_create_dir(char *directory); static bool orte_dir_check_file(const char *root, const char *path); -static char *orte_build_job_session_dir(char *top_dir, - orte_process_name_t *proc, - orte_jobid_t jobid); - #define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a) /**************************** @@ -112,175 +108,186 @@ static int orte_create_dir(char *directory) return ret; } -/* - * Construct the fullpath to the session directory - it - * will consist of "ompi..", and - * have subdirs: - * - * pid - the pid of the mpirun that oversees this job. Note - * that direct-launched processes will have manufactured - * this value - * - * jobid - jobid of the application being executed - * - * vpid - vpid of the process - */ -int -orte_session_dir_get_name(char **fulldirpath, - char **return_prefix, /* This will come back as the valid tmp dir */ - char **return_frontend, - char *hostid, - orte_process_name_t *proc) { - char *hostname = NULL, - *sessions = NULL, - *prefix = NULL, - *frontend = NULL, - *jobfam = NULL, - *job = NULL, - *vpidstr = NULL; - bool prefix_provided = false; - int exit_status = ORTE_SUCCESS; - size_t len; - uid_t uid; + +static int _setup_tmpdir_base() +{ + int rc = ORTE_SUCCESS; + + /* make sure that we have tmpdir_base set + * if we need it + */ + if (NULL == orte_process_info.tmpdir_base) { + orte_process_info.tmpdir_base = + strdup(opal_tmp_directory()); + if (NULL == orte_process_info.tmpdir_base) { + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto exit; + } + } +exit: + if( ORTE_SUCCESS != rc ){ + ORTE_ERROR_LOG(rc); + } + return rc; +} + +static int _setup_top_session_dir() +{ + int rc = ORTE_SUCCESS; + /* get the effective uid */ + uid_t uid = geteuid(); + + /* construct the top_session_dir if we need */ + if (NULL == orte_process_info.top_session_dir) { + if (ORTE_SUCCESS != (rc = _setup_tmpdir_base())) { + return rc; + } + if( NULL == orte_process_info.nodename || + NULL == orte_process_info.tmpdir_base ){ + /* we can't setup top session dir */ + rc = ORTE_ERR_BAD_PARAM; + goto exit; + } + + if (0 > asprintf(&orte_process_info.top_session_dir, + "%s/ompi.%s.%lu", orte_process_info.tmpdir_base, + orte_process_info.nodename, (unsigned long)uid)) { + orte_process_info.top_session_dir = NULL; + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto exit; + } + } +exit: + if( ORTE_SUCCESS != rc ){ + ORTE_ERROR_LOG(rc); + } + return rc; +} + +static int _setup_jobfam_session_dir(orte_process_name_t *proc) +{ + int rc = ORTE_SUCCESS; + + /* construct the top_session_dir if we need */ + if (NULL == orte_process_info.jobfam_session_dir) { + if (ORTE_SUCCESS != (rc = _setup_top_session_dir())) { + return rc; + } + + if (ORTE_PROC_IS_HNP) { + if (0 > asprintf(&orte_process_info.jobfam_session_dir, + "%s/pid.%lu", orte_process_info.top_session_dir, + (unsigned long)orte_process_info.pid) ) { + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto exit; + } + } else { + /* we were not given one, so define it */ + if (NULL == proc || (ORTE_JOBID_INVALID == proc->jobid) ) { + if (0 > asprintf(&orte_process_info.jobfam_session_dir, + "%s/jobfam", orte_process_info.top_session_dir) ) { + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto exit; + } + } else { + if (0 > asprintf(&orte_process_info.jobfam_session_dir, + "%s/jf.%d", orte_process_info.top_session_dir, + ORTE_JOB_FAMILY(proc->jobid))) { + orte_process_info.jobfam_session_dir = NULL; + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto exit; + } + } + } + } +exit: + if( ORTE_SUCCESS != rc ){ + ORTE_ERROR_LOG(rc); + } + return rc; +} + +static int +_setup_job_session_dir(orte_process_name_t *proc) +{ + int rc = ORTE_SUCCESS; + + /* construct the top_session_dir if we need */ + if( NULL == orte_process_info.job_session_dir ){ + if( ORTE_SUCCESS != (rc = _setup_jobfam_session_dir(proc)) ){ + return rc; + } + if (ORTE_JOBID_INVALID != proc->jobid) { + if (0 > asprintf(&orte_process_info.job_session_dir, + "%s/%d", orte_process_info.jobfam_session_dir, + ORTE_LOCAL_JOBID(proc->jobid))) { + orte_process_info.job_session_dir = NULL; + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto exit; + } + } else { + orte_process_info.job_session_dir = NULL; + } + } + +exit: + if( ORTE_SUCCESS != rc ){ + ORTE_ERROR_LOG(rc); + } + return rc; +} + +static int +_setup_proc_session_dir(orte_process_name_t *proc) +{ + int rc = ORTE_SUCCESS; + + /* construct the top_session_dir if we need */ + if( NULL == orte_process_info.proc_session_dir ){ + if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){ + return rc; + } + if (ORTE_VPID_INVALID != proc->vpid) { + if (0 > asprintf(&orte_process_info.proc_session_dir, + "%s/%d", orte_process_info.job_session_dir, + proc->vpid)) { + orte_process_info.proc_session_dir = NULL; + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto exit; + } + } else { + orte_process_info.proc_session_dir = NULL; + } + } + +exit: + if( ORTE_SUCCESS != rc ){ + ORTE_ERROR_LOG(rc); + } + return rc; +} + +int orte_session_setup_base(orte_process_name_t *proc) +{ + int rc; /* Ensure that system info is set */ orte_proc_info(); - /* get the effective uid */ - uid = geteuid(); - - /* - * set the 'hostname' - */ - if( NULL != hostid) { /* User specified version */ - hostname = strdup(hostid); - } - else { /* check if it is set elsewhere */ - if( NULL != orte_process_info.nodename) - hostname = strdup(orte_process_info.nodename); - else { - /* Couldn't find it, so fail */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - exit_status = ORTE_ERR_BAD_PARAM; - goto cleanup; - } + /* setup job and proc session directories */ + if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){ + return rc; } - /* construct the frontend of the session directory*/ - if (NULL != orte_process_info.top_session_dir) { - frontend = strdup(orte_process_info.top_session_dir); - } else { /* If not set then construct it */ - if (0 > asprintf(&frontend, "ompi.%s.%lu", hostname, (unsigned long)uid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - } - - /* construct the next level down, which belongs to the - * job family. This is related to the mpirun that launched - * the job, or is an arbitrary (agreed upon) value if - * direct launched */ - if (ORTE_PROC_IS_HNP) { - if (0 > asprintf(&jobfam, "pid.%lu", (unsigned long)orte_process_info.pid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - orte_process_info.jobfam_session_dir = strdup(jobfam); - } else if (NULL != orte_process_info.jobfam_session_dir) { - /* we had a job family session dir passed down to us by mpirun */ - jobfam = strdup(orte_process_info.jobfam_session_dir); - } else { - /* we were not given one, so define it */ - if (NULL == proc) { - jobfam = strdup("jobfam"); - } else { - if (0 > asprintf(&jobfam, "jf.%d", ORTE_JOB_FAMILY(proc->jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - } - orte_process_info.jobfam_session_dir = strdup(jobfam); - } - - /* - * Construct the session directory - */ - /* If we were given a valid vpid then we can construct it fully */ - if( NULL != proc) { - if (ORTE_VPID_INVALID != proc->vpid) { - if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - - if (ORTE_SUCCESS != orte_util_convert_vpid_to_string(&vpidstr, proc->vpid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - - sessions = opal_os_path(false, frontend, jobfam, job, vpidstr, NULL); - if( NULL == sessions ) { - ORTE_ERROR_LOG(ORTE_ERROR); - exit_status = ORTE_ERROR; - goto cleanup; - } - } else if (ORTE_JOBID_INVALID != proc->jobid) { - if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - exit_status = ORTE_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - - sessions = opal_os_path( false, frontend, jobfam, job, NULL ); - if( NULL == sessions ) { - ORTE_ERROR_LOG(ORTE_ERROR); - exit_status = ORTE_ERROR; - goto cleanup; - } - } else { - sessions = strdup(frontend); /* must dup this to avoid double-free later */ - } - - } else { - /* If we were not given a proc at all, then we just set it to frontend */ - sessions = strdup(frontend); /* must dup this to avoid double-free later */ - } - - /* - * If the user specified an invalid prefix, or no prefix at all - * we need to keep looking - */ - if( NULL != fulldirpath && NULL != *fulldirpath) { - free(*fulldirpath); - *fulldirpath = NULL; - } - - if( NULL != return_prefix && NULL != *return_prefix) { /* use the user specified one, if available */ - prefix = strdup(*return_prefix); - prefix_provided = true; - } - /* Try to find a proper alternative prefix */ - else if (NULL != orte_process_info.tmpdir_base) { /* stored value */ - prefix = strdup(orte_process_info.tmpdir_base); - } - else { /* General Environment var */ - prefix = strdup(opal_tmp_directory()); - } - len = strlen(prefix); - /* check for a trailing path separator */ - if (OPAL_PATH_SEP[0] == prefix[len-1]) { - prefix[len-1] = '\0'; + if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){ + return rc; } /* BEFORE doing anything else, check to see if this prefix is * allowed by the system */ - if (NULL != orte_prohibited_session_dirs) { + if (NULL != orte_prohibited_session_dirs || + NULL != orte_process_info.tmpdir_base ) { char **list; int i, len; /* break the string into tokens - it should be @@ -291,97 +298,36 @@ orte_session_dir_get_name(char **fulldirpath, /* cycle through the list */ for (i=0; i < len; i++) { /* check if prefix matches */ - if (0 == strncmp(prefix, list[i], strlen(list[i]))) { + if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) { /* this is a prohibited location */ orte_show_help("help-orte-runtime.txt", "orte:session:dir:prohibited", - true, prefix, orte_prohibited_session_dirs); + true, orte_process_info.tmpdir_base, + orte_prohibited_session_dirs); opal_argv_free(list); - free(prefix); - free(sessions); - free(hostname); - free(frontend); return ORTE_ERR_FATAL; } } opal_argv_free(list); /* done with this */ } - /* - * Construct the absolute final path, if requested - */ - if (NULL != fulldirpath) { - *fulldirpath = opal_os_path(false, prefix, sessions, NULL); - } - - /* - * Return the frontend and prefix, if user requested we do so - */ - if (NULL != return_frontend) { - *return_frontend = strdup(frontend); - } - if (!prefix_provided && NULL != return_prefix) { - *return_prefix = strdup(prefix); - } - - cleanup: - if(NULL != hostname) { - free(hostname); - } - if(NULL != sessions) { - free(sessions); - } - if (NULL != prefix) { - free(prefix); - } - if (NULL != frontend) { - free(frontend); - } - if (NULL != jobfam) { - free(jobfam); - } - if (NULL != job) { - free(job); - } - if (NULL != vpidstr) { - free(vpidstr); - } - - return exit_status; + return ORTE_SUCCESS; } /* * Construct the session directory and create it if necessary */ -int orte_session_dir(bool create, - char *prefix, char *hostid, - orte_process_name_t *proc) +int orte_session_dir(bool create, orte_process_name_t *proc) { - char *fulldirpath = NULL, - *frontend = NULL, - *sav = NULL; int rc = ORTE_SUCCESS; - char *local_prefix = NULL; - - /* use the specified prefix, if one was given */ - if (NULL != prefix) { - local_prefix = strdup(prefix); - } /* * Get the session directory full name */ - if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&fulldirpath, - &local_prefix, - &frontend, - hostid, - proc))) { + if (ORTE_SUCCESS != (rc = orte_session_setup_base(proc))) { if (ORTE_ERR_FATAL == rc) { /* this indicates we should abort quietly */ rc = ORTE_ERR_SILENT; - goto cleanup; } - /* otherwise, bark a little first */ - ORTE_ERROR_LOG(rc); goto cleanup; } @@ -389,73 +335,26 @@ int orte_session_dir(bool create, * Now that we have the full path, go ahead and create it if necessary */ if( create ) { - if( ORTE_SUCCESS != (rc = orte_create_dir(fulldirpath) ) ) { + if( ORTE_SUCCESS != (rc = orte_create_dir(orte_process_info.proc_session_dir)) ) { ORTE_ERROR_LOG(rc); goto cleanup; } } - /* update global structure fields */ - if (NULL != orte_process_info.tmpdir_base) { - free(orte_process_info.tmpdir_base); - } - orte_process_info.tmpdir_base = strdup(local_prefix); - if (NULL != orte_process_info.top_session_dir) { - free(orte_process_info.top_session_dir); - orte_process_info.top_session_dir = NULL; - } - if (NULL != frontend) { - orte_process_info.top_session_dir = strdup(frontend); - } - - /* - * Set the process session directory - */ - if (ORTE_VPID_INVALID != proc->vpid) { - if (NULL != orte_process_info.proc_session_dir) { - free(orte_process_info.proc_session_dir); - } - orte_process_info.proc_session_dir = strdup(fulldirpath); - - /* Strip off last part of directory structure */ - sav = opal_dirname(fulldirpath); - free(fulldirpath); - fulldirpath = sav; - sav = NULL; - } - - /* - * Set the job session directory - */ - if (ORTE_JOBID_INVALID != proc->jobid) { - if (NULL != orte_process_info.job_session_dir) { - free(orte_process_info.job_session_dir); - } - orte_process_info.job_session_dir = strdup(fulldirpath); - } - if (orte_debug_flag) { opal_output(0, "procdir: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir)); opal_output(0, "jobdir: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir)); - opal_output(0, "top: %s", + opal_output(0, "top: %s", + OMPI_PRINTF_FIX_STRING(orte_process_info.jobfam_session_dir)); + opal_output(0, "top: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir)); opal_output(0, "tmp: %s", OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base)); } cleanup: - if (NULL != local_prefix) { - free(local_prefix); - } - if(NULL != fulldirpath) { - free(fulldirpath); - } - if(NULL != frontend) { - free(frontend); - } - return rc; } @@ -466,16 +365,14 @@ int orte_session_dir_cleanup(orte_jobid_t jobid) { int rc = ORTE_SUCCESS; - char *tmp = NULL; - char *job_session_dir=NULL; - if (!orte_create_session_dirs) { - /* didn't create them */ + if (!orte_create_session_dirs ) { + /* we haven't created them */ return ORTE_SUCCESS; } - if (NULL == orte_process_info.tmpdir_base && - NULL == orte_process_info.top_session_dir) { + if (NULL == orte_process_info.job_session_dir || + NULL == orte_process_info.proc_session_dir) { /* this should never happen - it means we are calling * cleanup *before* properly setting up the session * dir system. This leaves open the possibility of @@ -486,37 +383,30 @@ orte_session_dir_cleanup(orte_jobid_t jobid) goto CLEANUP; } - /* need to setup the top_session_dir with the prefix */ - tmp = opal_os_path(false, - orte_process_info.tmpdir_base, - orte_process_info.top_session_dir, NULL); - - /* we can only blow away session directories for our job family */ - job_session_dir = orte_build_job_session_dir(tmp, ORTE_PROC_MY_NAME, jobid); - if (NULL == job_session_dir) { - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - /* recursively blow the whole session away for our job family, * saving only output files */ - opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file); + opal_os_dirpath_destroy(orte_process_info.job_session_dir, + true, orte_dir_check_file); /* now attempt to eliminate the top level directory itself - this * will fail if anything is present, but ensures we cleanup if * we are the last one out */ - opal_os_dirpath_destroy(tmp, false, orte_dir_check_file); + if( NULL != orte_process_info.top_session_dir ){ + opal_os_dirpath_destroy(orte_process_info.top_session_dir, + false, orte_dir_check_file); + } - if (NULL != job_session_dir && opal_os_dirpath_is_empty(job_session_dir)) { + if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) { if (orte_debug_flag) { opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting"); } - rmdir(job_session_dir); + rmdir(orte_process_info.job_session_dir); } else { if (orte_debug_flag) { - if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) { + if (OPAL_ERR_NOT_FOUND == + opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) { opal_output(0, "sess_dir_cleanup: job session dir does not exist"); } else { opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving"); @@ -525,24 +415,27 @@ orte_session_dir_cleanup(orte_jobid_t jobid) goto CLEANUP; } - if (opal_os_dirpath_is_empty(tmp)) { - if (orte_debug_flag) { - opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting"); - } - rmdir(tmp); - } else { - if (orte_debug_flag) { - if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) { - opal_output(0, "sess_dir_cleanup: top session dir does not exist"); - } else { - opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving"); + if ( NULL != orte_process_info.top_session_dir ){ + + if( opal_os_dirpath_is_empty(orte_process_info.top_session_dir) ) { + if (orte_debug_flag) { + opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting"); } - } + rmdir(orte_process_info.top_session_dir); + } else { + if (orte_debug_flag) { + if (OPAL_ERR_NOT_FOUND == + opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) { + opal_output(0, "sess_dir_cleanup: top session dir does not exist"); + } else { + opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving"); + } + } + } } CLEANUP: - if (NULL != tmp) free(tmp); - if (NULL != job_session_dir) free(job_session_dir); + return rc; } @@ -554,63 +447,41 @@ orte_session_dir_finalize(orte_process_name_t *proc) char *tmp; char *job_session_dir, *vpid, *proc_session_dir; - if (!orte_create_session_dirs) { - /* didn't create them */ + if (!orte_create_session_dirs ) { + /* we haven't created them */ return ORTE_SUCCESS; } - if (NULL == orte_process_info.tmpdir_base && - NULL == orte_process_info.top_session_dir) { + if (NULL == orte_process_info.job_session_dir || + NULL == orte_process_info.proc_session_dir) { /* this should never happen - it means we are calling * cleanup *before* properly setting up the session - * dir system. Protect against the possibility of + * dir system. This leaves open the possibility of * accidentally removing directories we shouldn't - * touch by returning + * touch */ - return ORTE_ERR_NOT_INITIALIZED; + rc = ORTE_ERR_NOT_INITIALIZED; + goto CLEANUP; } - /* need to setup the top_session_dir with the prefix */ - tmp = opal_os_path(false, - orte_process_info.tmpdir_base, - orte_process_info.top_session_dir, NULL); - - /* define the proc and job session directories for this process */ - if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, proc->vpid))) { - ORTE_ERROR_LOG(rc); - free(tmp); - return rc; - } - job_session_dir = orte_build_job_session_dir(tmp, proc, proc->jobid); - if( NULL == job_session_dir) { - free(tmp); - free(vpid); - return ORTE_ERR_OUT_OF_RESOURCE; - } - proc_session_dir = opal_os_path( false, job_session_dir, vpid, NULL ); - if( NULL == proc_session_dir ) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - free(tmp); - free(vpid); - free(job_session_dir); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - opal_os_dirpath_destroy(proc_session_dir, + opal_os_dirpath_destroy(orte_process_info.proc_session_dir, false, orte_dir_check_file); - opal_os_dirpath_destroy(job_session_dir, - false, orte_dir_check_file); - opal_os_dirpath_destroy(tmp, + opal_os_dirpath_destroy(orte_process_info.job_session_dir, false, orte_dir_check_file); + if( NULL != orte_process_info.top_session_dir ){ + opal_os_dirpath_destroy(orte_process_info.top_session_dir, + false, orte_dir_check_file); + } - if (opal_os_dirpath_is_empty(proc_session_dir)) { + if (opal_os_dirpath_is_empty(orte_process_info.proc_session_dir)) { if (orte_debug_flag) { opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting"); } - rmdir(proc_session_dir); + rmdir(orte_process_info.proc_session_dir); } else { if (orte_debug_flag) { - if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(proc_session_dir, 0)) { + if (OPAL_ERR_NOT_FOUND == + opal_os_dirpath_access(orte_process_info.proc_session_dir, 0)) { opal_output(0, "sess_dir_finalize: proc session dir does not exist"); } else { opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving"); @@ -619,14 +490,15 @@ orte_session_dir_finalize(orte_process_name_t *proc) goto CLEANUP; } - if (opal_os_dirpath_is_empty(job_session_dir)) { + if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) { if (orte_debug_flag) { opal_output(0, "sess_dir_finalize: found job session dir empty - deleting"); } - rmdir(job_session_dir); + rmdir(orte_process_info.job_session_dir); } else { if (orte_debug_flag) { - if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) { + if (OPAL_ERR_NOT_FOUND == + opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) { opal_output(0, "sess_dir_finalize: job session dir does not exist"); } else { opal_output(0, "sess_dir_finalize: job session dir not empty - leaving"); @@ -635,26 +507,25 @@ orte_session_dir_finalize(orte_process_name_t *proc) goto CLEANUP; } - if (opal_os_dirpath_is_empty(tmp)) { - if (orte_debug_flag) { - opal_output(0, "sess_dir_finalize: found top session dir empty - deleting"); - } - rmdir(tmp); - } else { - if (orte_debug_flag) { - if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) { - opal_output(0, "sess_dir_finalize: top session dir does not exist"); - } else { - opal_output(0, "sess_dir_finalize: top session dir not empty - leaving"); + if(NULL != orte_process_info.top_session_dir) { + if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) { + if (orte_debug_flag) { + opal_output(0, "sess_dir_finalize: found top session dir empty - deleting"); } - } + rmdir(tmp); + } else { + if (orte_debug_flag) { + if (OPAL_ERR_NOT_FOUND == + opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) { + opal_output(0, "sess_dir_finalize: top session dir does not exist"); + } else { + opal_output(0, "sess_dir_finalize: top session dir not empty - leaving"); + } + } + } } CLEANUP: - free(tmp); - free(vpid); - free(job_session_dir); - free(proc_session_dir); return ORTE_SUCCESS; } @@ -680,33 +551,3 @@ orte_dir_check_file(const char *root, const char *path) return true; } - -static char *orte_build_job_session_dir(char *top_dir, - orte_process_name_t *proc, - orte_jobid_t jobid) -{ - char *job_session_dir; - - if (ORTE_JOBID_WILDCARD != jobid) { - char *job = NULL; - - if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - job_session_dir = NULL; - goto out; - } - job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, job, NULL); - free(job); - if (NULL == job_session_dir) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - } - } else { - job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, NULL); - if( NULL == job_session_dir) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - } - } - -out: - return job_session_dir; -} diff --git a/orte/util/session_dir.h b/orte/util/session_dir.h index 6570b6e16b..1c77e8e698 100644 --- a/orte/util/session_dir.h +++ b/orte/util/session_dir.h @@ -99,19 +99,6 @@ BEGIN_C_DECLS * locate an already existing universe for reconnection * purposes. If set to "true", then the function * creates the directory, if possible. - * @param prefix A string variable indicating where the user - * stipulated the directory should be found or - * placed. A value of "NULL" indicates that the user - * specified no location - hence, the function explores - * a range of "standard" locations. - * @param hostid Name of the host on which the session directory is - * being built. Used to build the name of the - * "openmpi-sessions-[user]@[host]:[batch]" branch of - * the directory tree. NULL indicates that the nodename - * found in orte_process_info is to be used. - * @param batchid Batch job name, used in batch scheduling - * systems. NULL indicates that the default of "0" is - * to be used. * @param proc Pointer to a process name for which the session * dir name is desired * @@ -120,18 +107,13 @@ BEGIN_C_DECLS * @retval OMPI_ERROR The directory cannot be found (if create is * "false") or created (if create is "true"). */ -ORTE_DECLSPEC int orte_session_dir(bool create, char *prefix, char *hostid, - orte_process_name_t *proc); +ORTE_DECLSPEC int orte_session_dir(bool create, orte_process_name_t *proc); /* - * Construct the session directory name from the input parameters. - * This function does no checking that the directory exists, or can be used + * Setup session-related directory paths */ -ORTE_DECLSPEC int orte_session_dir_get_name(char **fulldirpath, - char **prfx, - char **frontend, - char *hostid, - orte_process_name_t *proc); +ORTE_DECLSPEC int orte_session_setup_base(orte_process_name_t *proc); + /** The orte_session_dir_finalize() function performs a cleanup of the * session directory tree. It first removes the session directory for