1
1

Several fixes related to session directories:

* enable OMPI to retrieve paths from RM through PMIx
* cleanups related to tempdirs.
Этот коммит содержится в:
Artem Polyakov 2016-09-01 16:03:10 +07:00
родитель fb51d65049
Коммит 81195ab724
16 изменённых файлов: 358 добавлений и 564 удалений

Просмотреть файл

@ -136,10 +136,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
orte_process_info.nodename)); orte_process_info.nodename));
if (ORTE_SUCCESS != (ret = orte_session_dir(true, if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "orte_session_dir"; error = "orte_session_dir";
goto error; goto error;
@ -149,29 +146,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
proc-specific session directory. */ proc-specific session directory. */
opal_output_set_output_file_info(orte_process_info.proc_session_dir, opal_output_set_output_file_info(orte_process_info.proc_session_dir,
"output-", NULL, NULL); "output-", NULL, NULL);
/* store the session directory location */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_NSDIR);
kv.type = OPAL_STRING;
kv.data.string = strdup(orte_process_info.job_session_dir);
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&kv);
error = "opal pmix put job sessiondir";
goto error;
}
OBJ_DESTRUCT(&kv);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_PROCDIR);
kv.type = OPAL_STRING;
kv.data.string = strdup(orte_process_info.proc_session_dir);
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&kv);
error = "opal pmix put proc sessiondir";
goto error;
}
OBJ_DESTRUCT(&kv);
} }
/* Setup the communication infrastructure */ /* Setup the communication infrastructure */
/* /*

Просмотреть файл

@ -237,10 +237,7 @@ int orte_ess_base_orted_setup(char **hosts)
/* take a pass thru the session directory code to fillin the /* take a pass thru the session directory code to fillin the
* tmpdir names - don't create anything yet * tmpdir names - don't create anything yet
*/ */
if (ORTE_SUCCESS != (ret = orte_session_dir(false, if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "orte_session_dir define"; error = "orte_session_dir define";
goto error; goto error;
@ -250,10 +247,7 @@ int orte_ess_base_orted_setup(char **hosts)
*/ */
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
/* now actually create the directory tree */ /* now actually create the directory tree */
if (ORTE_SUCCESS != (ret = orte_session_dir(true, if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "orte_session_dir"; error = "orte_session_dir";
goto error; goto error;
@ -277,11 +271,8 @@ int orte_ess_base_orted_setup(char **hosts)
/* define a log file name in the session directory */ /* define a log file name in the session directory */
snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log", snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log",
jobidstring, orte_process_info.nodename); jobidstring, orte_process_info.nodename);
log_path = opal_os_path(false, log_path = opal_os_path(false, orte_process_info.top_session_dir,
orte_process_info.tmpdir_base, log_file, NULL);
orte_process_info.top_session_dir,
log_file,
NULL);
fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640); fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640);
if (fd < 0) { if (fd < 0) {

Просмотреть файл

@ -145,10 +145,9 @@ int orte_ess_base_tool_setup(void)
* tmp base where any other session directories on * tmp base where any other session directories on
* this node might be located * this node might be located
*/ */
if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL,
&orte_process_info.tmpdir_base, ret = orte_session_setup_base(NULL);
&orte_process_info.top_session_dir, if (ORTE_SUCCESS != ret ) {
orte_process_info.nodename, NULL))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "define session dir names"; error = "define session dir names";
goto error; goto error;

Просмотреть файл

@ -138,7 +138,7 @@ static int rte_init(void)
{ {
int ret; int ret;
char *error = NULL; char *error = NULL;
char *contact_path, *jobfam_dir; char *contact_path;
orte_job_t *jdata; orte_job_t *jdata;
orte_node_t *node; orte_node_t *node;
orte_proc_t *proc; orte_proc_t *proc;
@ -294,10 +294,7 @@ static int rte_init(void)
/* take a pass thru the session directory code to fillin the /* take a pass thru the session directory code to fillin the
* tmpdir names - don't create anything yet * tmpdir names - don't create anything yet
*/ */
if (ORTE_SUCCESS != (ret = orte_session_dir(false, if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
error = "orte_session_dir define"; error = "orte_session_dir define";
goto error; goto error;
} }
@ -307,10 +304,7 @@ static int rte_init(void)
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
/* now actually create the directory tree */ /* now actually create the directory tree */
if (ORTE_SUCCESS != (ret = orte_session_dir(true, if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
orte_process_info.tmpdir_base,
orte_process_info.nodename,
ORTE_PROC_MY_NAME))) {
error = "orte_session_dir"; error = "orte_session_dir";
goto error; goto error;
} }
@ -586,9 +580,12 @@ static int rte_init(void)
opal_output_set_output_file_info(orte_process_info.proc_session_dir, opal_output_set_output_file_info(orte_process_info.proc_session_dir,
"output-", NULL, NULL); "output-", NULL, NULL);
/* save my contact info in a file for others to find */ /* save my contact info in a file for others to find */
jobfam_dir = opal_dirname(orte_process_info.job_session_dir); if( NULL == orte_process_info.jobfam_session_dir ){
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); /* has to be set here! */
free(jobfam_dir); ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
goto error;
}
contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, "contact.txt", NULL);
OPAL_OUTPUT_VERBOSE((2, orte_debug_output, OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
"%s writing contact file %s", "%s writing contact file %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -758,10 +755,9 @@ static int rte_init(void)
true, error, ORTE_ERROR_NAME(ret), ret); true, error, ORTE_ERROR_NAME(ret), ret);
} }
/* remove my contact info file, if we have session directories */ /* remove my contact info file, if we have session directories */
if (NULL != orte_process_info.job_session_dir) { if (NULL != orte_process_info.jobfam_session_dir) {
jobfam_dir = opal_dirname(orte_process_info.job_session_dir); contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); "contact.txt", NULL);
free(jobfam_dir);
unlink(contact_path); unlink(contact_path);
free(contact_path); free(contact_path);
} }
@ -775,7 +771,6 @@ static int rte_init(void)
static int rte_finalize(void) static int rte_finalize(void)
{ {
char *contact_path; char *contact_path;
char *jobfam_dir;
if (signals_set) { if (signals_set) {
/* Remove the epipe handler */ /* Remove the epipe handler */
@ -816,10 +811,9 @@ static int rte_finalize(void)
(void) mca_base_framework_close(&opal_pstat_base_framework); (void) mca_base_framework_close(&opal_pstat_base_framework);
/* remove my contact info file, if we have session directories */ /* remove my contact info file, if we have session directories */
if (NULL != orte_process_info.job_session_dir) { if (NULL != orte_process_info.jobfam_session_dir) {
jobfam_dir = opal_dirname(orte_process_info.job_session_dir); contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); "contact.txt", NULL);
free(jobfam_dir);
unlink(contact_path); unlink(contact_path);
free(contact_path); free(contact_path);
} }

Просмотреть файл

@ -242,6 +242,43 @@ static int rte_init(void)
free(string_key); free(string_key);
} }
/* retrieve temp directories info */
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* TODO: who has precedence - pmix of MCA setting??? */
if( NULL == orte_process_info.top_session_dir ){
orte_process_info.top_session_dir = val;
} else {
/* keep the MCA setting */
free(val);
}
val = NULL;
}
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* TODO: who has precedence - pmix of MCA setting??? */
if( NULL == orte_process_info.job_session_dir ){
orte_process_info.job_session_dir = val;
} else {
/* keep the MCA setting */
free(val);
}
val = NULL;
}
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* TODO: who has precedence - pmix of MCA setting??? */
if( NULL == orte_process_info.proc_session_dir ){
orte_process_info.proc_session_dir = val;
} else {
/* keep the MCA setting */
free(val);
}
val = NULL;
}
/* retrieve our topology */ /* retrieve our topology */
val = NULL; val = NULL;
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO, OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,

Просмотреть файл

@ -105,6 +105,17 @@ static void recv_ack(int status, orte_process_name_t* sender,
void* cbdata); void* cbdata);
static void write_handler(int fd, short event, void *cbdata); static void write_handler(int fd, short event, void *cbdata);
static char *filem_session_dir()
{
char *session_dir = orte_process_info.jobfam_session_dir;
if( NULL == session_dir ){
/* if no job family session dir was provided -
* use the job session dir */
session_dir = orte_process_info.job_session_dir;
}
return session_dir;
}
static int raw_init(void) static int raw_init(void)
{ {
OBJ_CONSTRUCT(&incoming_files, opal_list_t); OBJ_CONSTRUCT(&incoming_files, opal_list_t);
@ -657,25 +668,26 @@ static int create_link(char *my_dir, char *path,
static int raw_link_local_files(orte_job_t *jdata, static int raw_link_local_files(orte_job_t *jdata,
orte_app_context_t *app) orte_app_context_t *app)
{ {
char *my_dir, *path=NULL; char *session_dir, *path=NULL;
orte_proc_t *proc; orte_proc_t *proc;
char *prefix;
int i, j, rc; int i, j, rc;
orte_filem_raw_incoming_t *inbnd; orte_filem_raw_incoming_t *inbnd;
opal_list_item_t *item; opal_list_item_t *item;
char **files=NULL, *bname, *filestring; char **files=NULL, *bname, *filestring;
/* check my session directory for files I have received and /* check my jobfam session directory for files I have received and
* symlink them to the proc-level session directory of each * symlink them to the proc-level session directory of each
* local process in the job * local process in the job
*
* TODO: @rhc - please check that I've correctly interpret your
* intention here
*/ */
my_dir = opal_dirname(orte_process_info.job_session_dir); session_dir = filem_session_dir();
if( NULL == session_dir){
/* setup */ /* we were unable to find any suitable directory */
if (NULL != orte_process_info.tmpdir_base) { rc = ORTE_ERR_BAD_PARAM;
prefix = strdup(orte_process_info.tmpdir_base); ORTE_ERROR_LOG(rc);
} else { return rc;
prefix = NULL;
} }
/* get the list of files this app wants */ /* get the list of files this app wants */
@ -692,10 +704,6 @@ static int raw_link_local_files(orte_job_t *jdata,
/* if there are no files to link, then ignore this */ /* if there are no files to link, then ignore this */
if (NULL == files) { if (NULL == files) {
free(my_dir);
if (NULL != prefix) {
free(prefix);
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -736,10 +744,8 @@ static int raw_link_local_files(orte_job_t *jdata,
ORTE_NAME_PRINT(&proc->name))); ORTE_NAME_PRINT(&proc->name)));
/* get the session dir name in absolute form */ /* get the session dir name in absolute form */
path = NULL; path = orte_process_info.proc_session_dir;
rc = orte_session_dir_get_name(&path, &prefix, NULL,
orte_process_info.nodename,
&proc->name);
/* create it, if it doesn't already exist */ /* create it, if it doesn't already exist */
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) { if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -747,11 +753,6 @@ static int raw_link_local_files(orte_job_t *jdata,
* create it - either way, we are done * create it - either way, we are done
*/ */
free(files); free(files);
if (NULL != prefix) {
free(prefix);
}
free(path);
free(my_dir);
return rc; return rc;
} }
@ -775,13 +776,8 @@ static int raw_link_local_files(orte_job_t *jdata,
inbnd->file)); inbnd->file));
/* cycle thru the link points and create symlinks to them */ /* cycle thru the link points and create symlinks to them */
for (j=0; NULL != inbnd->link_pts[j]; j++) { for (j=0; NULL != inbnd->link_pts[j]; j++) {
if (ORTE_SUCCESS != (rc = create_link(my_dir, path, inbnd->link_pts[j]))) { if (ORTE_SUCCESS != (rc = create_link(session_dir, path, inbnd->link_pts[j]))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(my_dir);
free(path);
if (NULL != prefix) {
free(prefix);
}
free(files); free(files);
return rc; return rc;
} }
@ -796,13 +792,8 @@ static int raw_link_local_files(orte_job_t *jdata,
} }
} }
} }
free(path);
} }
opal_argv_free(files); opal_argv_free(files);
if (NULL != prefix) {
free(prefix);
}
free(my_dir);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -999,7 +990,7 @@ static void recv_files(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag, opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata) void* cbdata)
{ {
char *file, *jobfam_dir; char *file, *session_dir;
int32_t nchunk, n, nbytes; int32_t nchunk, n, nbytes;
unsigned char data[ORTE_FILEM_RAW_CHUNK_MAX]; unsigned char data[ORTE_FILEM_RAW_CHUNK_MAX];
int rc; int rc;
@ -1086,9 +1077,9 @@ static void recv_files(int status, orte_process_name_t* sender,
incoming->top = strdup(tmp); incoming->top = strdup(tmp);
free(tmp); free(tmp);
/* define the full path to where we will put it */ /* define the full path to where we will put it */
jobfam_dir = opal_dirname(orte_process_info.job_session_dir); session_dir = filem_session_dir();
incoming->fullpath = opal_os_path(false, jobfam_dir, file, NULL);
free(jobfam_dir); incoming->fullpath = opal_os_path(false, session_dir, file, NULL);
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output,
"%s filem:raw: opening target file %s", "%s filem:raw: opening target file %s",

Просмотреть файл

@ -164,8 +164,7 @@ static int component_available(void)
/* if session directories were forbidden, then we cannot be used */ /* if session directories were forbidden, then we cannot be used */
if (!orte_create_session_dirs || if (!orte_create_session_dirs ||
NULL == orte_process_info.tmpdir_base || NULL == orte_process_info.jobfam_session_dir ) {
NULL == orte_process_info.top_session_dir) {
return ORTE_ERR_NOT_SUPPORTED; return ORTE_ERR_NOT_SUPPORTED;
} }
@ -216,9 +215,7 @@ static int component_startup(void)
/* setup the path to the daemon rendezvous point */ /* setup the path to the daemon rendezvous point */
memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un)); memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un));
mca_oob_usock_component.address.sun_family = AF_UNIX; mca_oob_usock_component.address.sun_family = AF_UNIX;
session = opal_os_path(false, orte_process_info.tmpdir_base, session = opal_os_path(false, orte_process_info.jobfam_session_dir,
orte_process_info.top_session_dir,
orte_process_info.jobfam_session_dir,
"usock", NULL); "usock", NULL);
if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) { if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) {
opal_output(0, "SESSION DIR TOO LONG"); opal_output(0, "SESSION DIR TOO LONG");

Просмотреть файл

@ -943,6 +943,7 @@ static int setup_fork(orte_job_t *jdata,
/* forcibly set the local tmpdir base and top session dir to match ours */ /* forcibly set the local tmpdir base and top session dir to match ours */
opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env); opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env);
/* TODO: should we use PMIx key to pass this data? */
opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env); opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env);
opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env); opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env);
@ -1102,24 +1103,8 @@ static int setup_child(orte_job_t *jdata,
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE); ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
} }
/* construct the proc's session dir name */
if (NULL != orte_process_info.tmpdir_base) {
value = strdup(orte_process_info.tmpdir_base);
} else {
value = NULL;
}
param = NULL;
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&param, &value, NULL,
orte_process_info.nodename,
&child->name))) {
ORTE_ERROR_LOG(rc);
if (NULL != value) {
free(value);
}
return rc;
}
free(value);
/* pass an envar so the proc can find any files it had prepositioned */ /* pass an envar so the proc can find any files it had prepositioned */
param = orte_process_info.proc_session_dir;
opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env); opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env);
/* if the user wanted the cwd to be the proc's session dir, then /* if the user wanted the cwd to be the proc's session dir, then
@ -1132,12 +1117,10 @@ static int setup_child(orte_job_t *jdata,
/* doesn't exist with correct permissions, and/or we can't /* doesn't exist with correct permissions, and/or we can't
* create it - either way, we are done * create it - either way, we are done
*/ */
free(param);
return rc; return rc;
} }
/* change to it */ /* change to it */
if (0 != chdir(param)) { if (0 != chdir(param)) {
free(param);
return ORTE_ERROR; return ORTE_ERROR;
} }
/* It seems that chdir doesn't /* It seems that chdir doesn't
@ -1154,6 +1137,5 @@ static int setup_child(orte_job_t *jdata,
/* update the initial wdir value too */ /* update the initial wdir value too */
opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env); opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env);
} }
free(param);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -2132,7 +2132,8 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
/* create the attachment FIFO and setup readevent - cannot be /* create the attachment FIFO and setup readevent - cannot be
* done if no session dirs exist! * done if no session dirs exist!
*/ */
attach_fifo = opal_os_path(false, orte_process_info.job_session_dir, "debugger_attach_fifo", NULL); attach_fifo = opal_os_path(false, orte_process_info.job_session_dir,
"debugger_attach_fifo", NULL);
if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) { if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) {
opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno); opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno);
free(attach_fifo); free(attach_fifo);

Просмотреть файл

@ -262,9 +262,7 @@ int pmix_server_init(void)
kv = OBJ_NEW(opal_value_t); kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR); kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR);
kv->type = OPAL_STRING; kv->type = OPAL_STRING;
kv->data.string = opal_os_path(false, orte_process_info.tmpdir_base, kv->data.string = opal_os_path(false, orte_process_info.jobfam_session_dir, NULL);
orte_process_info.top_session_dir,
orte_process_info.jobfam_session_dir, NULL);
opal_list_append(&info, &kv->super); opal_list_append(&info, &kv->super);
/* use the same for the system temp directory - this is /* use the same for the system temp directory - this is
* where the system-level tool connections will go */ * where the system-level tool connections will go */

Просмотреть файл

@ -310,7 +310,7 @@ static int orte_cr_coord_post_restart(void) {
* Add the previous session directory for cleanup * Add the previous session directory for cleanup
*/ */
opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true); opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
tmp_dir = opal_dirname(orte_process_info.job_session_dir); tmp_dir = orte_process_info.jobfam_session_dir;
if( NULL != tmp_dir ) { if( NULL != tmp_dir ) {
opal_crs_base_cleanup_append(tmp_dir, true); opal_crs_base_cleanup_append(tmp_dir, true);
free(tmp_dir); free(tmp_dir);

Просмотреть файл

@ -160,7 +160,7 @@ int orte_register_params(void)
&orte_top_session_dir); &orte_top_session_dir);
if (NULL != orte_top_session_dir) { if (NULL != orte_top_session_dir) {
if (NULL != orte_process_info.top_session_dir) { if (NULL != orte_process_info.top_session_dir) {
free(orte_process_info.top_session_dir); free(orte_process_info.top_session_dir);
} }
orte_process_info.top_session_dir = strdup(orte_top_session_dir); orte_process_info.top_session_dir = strdup(orte_top_session_dir);

Просмотреть файл

@ -182,7 +182,7 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect)
/* /*
* Check to make sure we have access to the top-level directory * Check to make sure we have access to the top-level directory
*/ */
headdir = opal_os_path(false, orte_process_info.tmpdir_base, orte_process_info.top_session_dir, NULL); headdir = orte_process_info.top_session_dir;
if( ORTE_SUCCESS != (ret = opal_os_dirpath_access(headdir, 0) )) { if( ORTE_SUCCESS != (ret = opal_os_dirpath_access(headdir, 0) )) {
/* it is okay not to find this as there may not be any /* it is okay not to find this as there may not be any
@ -231,7 +231,6 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect)
cleanup: cleanup:
if( NULL != cur_dirp ) if( NULL != cur_dirp )
closedir(cur_dirp); closedir(cur_dirp);
free(headdir);
return (opal_list_is_empty(hnps) ? ORTE_ERR_NOT_FOUND : ORTE_SUCCESS); return (opal_list_is_empty(hnps) ? ORTE_ERR_NOT_FOUND : ORTE_SUCCESS);
} }

Просмотреть файл

@ -108,6 +108,14 @@ int orte_proc_info(void)
if (init) { if (init) {
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
{
int delay = 0;
while( delay ){
sleep(1);
}
}
init = true; init = true;
OBJ_CONSTRUCT(&orte_process_info.super, opal_proc_t); OBJ_CONSTRUCT(&orte_process_info.super, opal_proc_t);

Просмотреть файл

@ -73,10 +73,6 @@ static int orte_create_dir(char *directory);
static bool orte_dir_check_file(const char *root, const char *path); static bool orte_dir_check_file(const char *root, const char *path);
static char *orte_build_job_session_dir(char *top_dir,
orte_process_name_t *proc,
orte_jobid_t jobid);
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a) #define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
/**************************** /****************************
@ -112,175 +108,186 @@ static int orte_create_dir(char *directory)
return ret; return ret;
} }
/*
* Construct the fullpath to the session directory - it static int _setup_tmpdir_base()
* will consist of "ompi.<hostname>.<effective-uid>", and {
* have subdirs: int rc = ORTE_SUCCESS;
*
* pid - the pid of the mpirun that oversees this job. Note /* make sure that we have tmpdir_base set
* that direct-launched processes will have manufactured * if we need it
* this value */
* if (NULL == orte_process_info.tmpdir_base) {
* jobid - jobid of the application being executed orte_process_info.tmpdir_base =
* strdup(opal_tmp_directory());
* vpid - vpid of the process if (NULL == orte_process_info.tmpdir_base) {
*/ rc = ORTE_ERR_OUT_OF_RESOURCE;
int goto exit;
orte_session_dir_get_name(char **fulldirpath, }
char **return_prefix, /* This will come back as the valid tmp dir */ }
char **return_frontend, exit:
char *hostid, if( ORTE_SUCCESS != rc ){
orte_process_name_t *proc) { ORTE_ERROR_LOG(rc);
char *hostname = NULL, }
*sessions = NULL, return rc;
*prefix = NULL, }
*frontend = NULL,
*jobfam = NULL, static int _setup_top_session_dir()
*job = NULL, {
*vpidstr = NULL; int rc = ORTE_SUCCESS;
bool prefix_provided = false; /* get the effective uid */
int exit_status = ORTE_SUCCESS; uid_t uid = geteuid();
size_t len;
uid_t uid; /* construct the top_session_dir if we need */
if (NULL == orte_process_info.top_session_dir) {
if (ORTE_SUCCESS != (rc = _setup_tmpdir_base())) {
return rc;
}
if( NULL == orte_process_info.nodename ||
NULL == orte_process_info.tmpdir_base ){
/* we can't setup top session dir */
rc = ORTE_ERR_BAD_PARAM;
goto exit;
}
if (0 > asprintf(&orte_process_info.top_session_dir,
"%s/ompi.%s.%lu", orte_process_info.tmpdir_base,
orte_process_info.nodename, (unsigned long)uid)) {
orte_process_info.top_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int _setup_jobfam_session_dir(orte_process_name_t *proc)
{
int rc = ORTE_SUCCESS;
/* construct the top_session_dir if we need */
if (NULL == orte_process_info.jobfam_session_dir) {
if (ORTE_SUCCESS != (rc = _setup_top_session_dir())) {
return rc;
}
if (ORTE_PROC_IS_HNP) {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/pid.%lu", orte_process_info.top_session_dir,
(unsigned long)orte_process_info.pid) ) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
/* we were not given one, so define it */
if (NULL == proc || (ORTE_JOBID_INVALID == proc->jobid) ) {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/jobfam", orte_process_info.top_session_dir) ) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
"%s/jf.%d", orte_process_info.top_session_dir,
ORTE_JOB_FAMILY(proc->jobid))) {
orte_process_info.jobfam_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
}
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int
_setup_job_session_dir(orte_process_name_t *proc)
{
int rc = ORTE_SUCCESS;
/* construct the top_session_dir if we need */
if( NULL == orte_process_info.job_session_dir ){
if( ORTE_SUCCESS != (rc = _setup_jobfam_session_dir(proc)) ){
return rc;
}
if (ORTE_JOBID_INVALID != proc->jobid) {
if (0 > asprintf(&orte_process_info.job_session_dir,
"%s/%d", orte_process_info.jobfam_session_dir,
ORTE_LOCAL_JOBID(proc->jobid))) {
orte_process_info.job_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
orte_process_info.job_session_dir = NULL;
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
static int
_setup_proc_session_dir(orte_process_name_t *proc)
{
int rc = ORTE_SUCCESS;
/* construct the top_session_dir if we need */
if( NULL == orte_process_info.proc_session_dir ){
if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
return rc;
}
if (ORTE_VPID_INVALID != proc->vpid) {
if (0 > asprintf(&orte_process_info.proc_session_dir,
"%s/%d", orte_process_info.job_session_dir,
proc->vpid)) {
orte_process_info.proc_session_dir = NULL;
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {
orte_process_info.proc_session_dir = NULL;
}
}
exit:
if( ORTE_SUCCESS != rc ){
ORTE_ERROR_LOG(rc);
}
return rc;
}
int orte_session_setup_base(orte_process_name_t *proc)
{
int rc;
/* Ensure that system info is set */ /* Ensure that system info is set */
orte_proc_info(); orte_proc_info();
/* get the effective uid */ /* setup job and proc session directories */
uid = geteuid(); if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
return rc;
/*
* set the 'hostname'
*/
if( NULL != hostid) { /* User specified version */
hostname = strdup(hostid);
}
else { /* check if it is set elsewhere */
if( NULL != orte_process_info.nodename)
hostname = strdup(orte_process_info.nodename);
else {
/* Couldn't find it, so fail */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
exit_status = ORTE_ERR_BAD_PARAM;
goto cleanup;
}
} }
/* construct the frontend of the session directory*/ if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){
if (NULL != orte_process_info.top_session_dir) { return rc;
frontend = strdup(orte_process_info.top_session_dir);
} else { /* If not set then construct it */
if (0 > asprintf(&frontend, "ompi.%s.%lu", hostname, (unsigned long)uid)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
/* construct the next level down, which belongs to the
* job family. This is related to the mpirun that launched
* the job, or is an arbitrary (agreed upon) value if
* direct launched */
if (ORTE_PROC_IS_HNP) {
if (0 > asprintf(&jobfam, "pid.%lu", (unsigned long)orte_process_info.pid)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
orte_process_info.jobfam_session_dir = strdup(jobfam);
} else if (NULL != orte_process_info.jobfam_session_dir) {
/* we had a job family session dir passed down to us by mpirun */
jobfam = strdup(orte_process_info.jobfam_session_dir);
} else {
/* we were not given one, so define it */
if (NULL == proc) {
jobfam = strdup("jobfam");
} else {
if (0 > asprintf(&jobfam, "jf.%d", ORTE_JOB_FAMILY(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
orte_process_info.jobfam_session_dir = strdup(jobfam);
}
/*
* Construct the session directory
*/
/* If we were given a valid vpid then we can construct it fully */
if( NULL != proc) {
if (ORTE_VPID_INVALID != proc->vpid) {
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
if (ORTE_SUCCESS != orte_util_convert_vpid_to_string(&vpidstr, proc->vpid)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
sessions = opal_os_path(false, frontend, jobfam, job, vpidstr, NULL);
if( NULL == sessions ) {
ORTE_ERROR_LOG(ORTE_ERROR);
exit_status = ORTE_ERROR;
goto cleanup;
}
} else if (ORTE_JOBID_INVALID != proc->jobid) {
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
sessions = opal_os_path( false, frontend, jobfam, job, NULL );
if( NULL == sessions ) {
ORTE_ERROR_LOG(ORTE_ERROR);
exit_status = ORTE_ERROR;
goto cleanup;
}
} else {
sessions = strdup(frontend); /* must dup this to avoid double-free later */
}
} else {
/* If we were not given a proc at all, then we just set it to frontend */
sessions = strdup(frontend); /* must dup this to avoid double-free later */
}
/*
* If the user specified an invalid prefix, or no prefix at all
* we need to keep looking
*/
if( NULL != fulldirpath && NULL != *fulldirpath) {
free(*fulldirpath);
*fulldirpath = NULL;
}
if( NULL != return_prefix && NULL != *return_prefix) { /* use the user specified one, if available */
prefix = strdup(*return_prefix);
prefix_provided = true;
}
/* Try to find a proper alternative prefix */
else if (NULL != orte_process_info.tmpdir_base) { /* stored value */
prefix = strdup(orte_process_info.tmpdir_base);
}
else { /* General Environment var */
prefix = strdup(opal_tmp_directory());
}
len = strlen(prefix);
/* check for a trailing path separator */
if (OPAL_PATH_SEP[0] == prefix[len-1]) {
prefix[len-1] = '\0';
} }
/* BEFORE doing anything else, check to see if this prefix is /* BEFORE doing anything else, check to see if this prefix is
* allowed by the system * allowed by the system
*/ */
if (NULL != orte_prohibited_session_dirs) { if (NULL != orte_prohibited_session_dirs ||
NULL != orte_process_info.tmpdir_base ) {
char **list; char **list;
int i, len; int i, len;
/* break the string into tokens - it should be /* break the string into tokens - it should be
@ -291,97 +298,36 @@ orte_session_dir_get_name(char **fulldirpath,
/* cycle through the list */ /* cycle through the list */
for (i=0; i < len; i++) { for (i=0; i < len; i++) {
/* check if prefix matches */ /* check if prefix matches */
if (0 == strncmp(prefix, list[i], strlen(list[i]))) { if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) {
/* this is a prohibited location */ /* this is a prohibited location */
orte_show_help("help-orte-runtime.txt", orte_show_help("help-orte-runtime.txt",
"orte:session:dir:prohibited", "orte:session:dir:prohibited",
true, prefix, orte_prohibited_session_dirs); true, orte_process_info.tmpdir_base,
orte_prohibited_session_dirs);
opal_argv_free(list); opal_argv_free(list);
free(prefix);
free(sessions);
free(hostname);
free(frontend);
return ORTE_ERR_FATAL; return ORTE_ERR_FATAL;
} }
} }
opal_argv_free(list); /* done with this */ opal_argv_free(list); /* done with this */
} }
/* return ORTE_SUCCESS;
* Construct the absolute final path, if requested
*/
if (NULL != fulldirpath) {
*fulldirpath = opal_os_path(false, prefix, sessions, NULL);
}
/*
* Return the frontend and prefix, if user requested we do so
*/
if (NULL != return_frontend) {
*return_frontend = strdup(frontend);
}
if (!prefix_provided && NULL != return_prefix) {
*return_prefix = strdup(prefix);
}
cleanup:
if(NULL != hostname) {
free(hostname);
}
if(NULL != sessions) {
free(sessions);
}
if (NULL != prefix) {
free(prefix);
}
if (NULL != frontend) {
free(frontend);
}
if (NULL != jobfam) {
free(jobfam);
}
if (NULL != job) {
free(job);
}
if (NULL != vpidstr) {
free(vpidstr);
}
return exit_status;
} }
/* /*
* Construct the session directory and create it if necessary * Construct the session directory and create it if necessary
*/ */
int orte_session_dir(bool create, int orte_session_dir(bool create, orte_process_name_t *proc)
char *prefix, char *hostid,
orte_process_name_t *proc)
{ {
char *fulldirpath = NULL,
*frontend = NULL,
*sav = NULL;
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
char *local_prefix = NULL;
/* use the specified prefix, if one was given */
if (NULL != prefix) {
local_prefix = strdup(prefix);
}
/* /*
* Get the session directory full name * Get the session directory full name
*/ */
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&fulldirpath, if (ORTE_SUCCESS != (rc = orte_session_setup_base(proc))) {
&local_prefix,
&frontend,
hostid,
proc))) {
if (ORTE_ERR_FATAL == rc) { if (ORTE_ERR_FATAL == rc) {
/* this indicates we should abort quietly */ /* this indicates we should abort quietly */
rc = ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
goto cleanup;
} }
/* otherwise, bark a little first */
ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
@ -389,73 +335,26 @@ int orte_session_dir(bool create,
* Now that we have the full path, go ahead and create it if necessary * Now that we have the full path, go ahead and create it if necessary
*/ */
if( create ) { if( create ) {
if( ORTE_SUCCESS != (rc = orte_create_dir(fulldirpath) ) ) { if( ORTE_SUCCESS != (rc = orte_create_dir(orte_process_info.proc_session_dir)) ) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
} }
/* update global structure fields */
if (NULL != orte_process_info.tmpdir_base) {
free(orte_process_info.tmpdir_base);
}
orte_process_info.tmpdir_base = strdup(local_prefix);
if (NULL != orte_process_info.top_session_dir) {
free(orte_process_info.top_session_dir);
orte_process_info.top_session_dir = NULL;
}
if (NULL != frontend) {
orte_process_info.top_session_dir = strdup(frontend);
}
/*
* Set the process session directory
*/
if (ORTE_VPID_INVALID != proc->vpid) {
if (NULL != orte_process_info.proc_session_dir) {
free(orte_process_info.proc_session_dir);
}
orte_process_info.proc_session_dir = strdup(fulldirpath);
/* Strip off last part of directory structure */
sav = opal_dirname(fulldirpath);
free(fulldirpath);
fulldirpath = sav;
sav = NULL;
}
/*
* Set the job session directory
*/
if (ORTE_JOBID_INVALID != proc->jobid) {
if (NULL != orte_process_info.job_session_dir) {
free(orte_process_info.job_session_dir);
}
orte_process_info.job_session_dir = strdup(fulldirpath);
}
if (orte_debug_flag) { if (orte_debug_flag) {
opal_output(0, "procdir: %s", opal_output(0, "procdir: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir)); OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir));
opal_output(0, "jobdir: %s", opal_output(0, "jobdir: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir)); OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir));
opal_output(0, "top: %s", opal_output(0, "top: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.jobfam_session_dir));
opal_output(0, "top: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir)); OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir));
opal_output(0, "tmp: %s", opal_output(0, "tmp: %s",
OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base)); OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base));
} }
cleanup: cleanup:
if (NULL != local_prefix) {
free(local_prefix);
}
if(NULL != fulldirpath) {
free(fulldirpath);
}
if(NULL != frontend) {
free(frontend);
}
return rc; return rc;
} }
@ -466,16 +365,14 @@ int
orte_session_dir_cleanup(orte_jobid_t jobid) orte_session_dir_cleanup(orte_jobid_t jobid)
{ {
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
char *tmp = NULL;
char *job_session_dir=NULL;
if (!orte_create_session_dirs) { if (!orte_create_session_dirs ) {
/* didn't create them */ /* we haven't created them */
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
if (NULL == orte_process_info.tmpdir_base && if (NULL == orte_process_info.job_session_dir ||
NULL == orte_process_info.top_session_dir) { NULL == orte_process_info.proc_session_dir) {
/* this should never happen - it means we are calling /* this should never happen - it means we are calling
* cleanup *before* properly setting up the session * cleanup *before* properly setting up the session
* dir system. This leaves open the possibility of * dir system. This leaves open the possibility of
@ -486,37 +383,30 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
goto CLEANUP; goto CLEANUP;
} }
/* need to setup the top_session_dir with the prefix */
tmp = opal_os_path(false,
orte_process_info.tmpdir_base,
orte_process_info.top_session_dir, NULL);
/* we can only blow away session directories for our job family */
job_session_dir = orte_build_job_session_dir(tmp, ORTE_PROC_MY_NAME, jobid);
if (NULL == job_session_dir) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* recursively blow the whole session away for our job family, /* recursively blow the whole session away for our job family,
* saving only output files * saving only output files
*/ */
opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file); opal_os_dirpath_destroy(orte_process_info.job_session_dir,
true, orte_dir_check_file);
/* now attempt to eliminate the top level directory itself - this /* now attempt to eliminate the top level directory itself - this
* will fail if anything is present, but ensures we cleanup if * will fail if anything is present, but ensures we cleanup if
* we are the last one out * we are the last one out
*/ */
opal_os_dirpath_destroy(tmp, false, orte_dir_check_file); if( NULL != orte_process_info.top_session_dir ){
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
false, orte_dir_check_file);
}
if (NULL != job_session_dir && opal_os_dirpath_is_empty(job_session_dir)) { if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
if (orte_debug_flag) { if (orte_debug_flag) {
opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting"); opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting");
} }
rmdir(job_session_dir); rmdir(orte_process_info.job_session_dir);
} else { } else {
if (orte_debug_flag) { if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) { if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
opal_output(0, "sess_dir_cleanup: job session dir does not exist"); opal_output(0, "sess_dir_cleanup: job session dir does not exist");
} else { } else {
opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving"); opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving");
@ -525,24 +415,27 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
goto CLEANUP; goto CLEANUP;
} }
if (opal_os_dirpath_is_empty(tmp)) { if ( NULL != orte_process_info.top_session_dir ){
if (orte_debug_flag) {
opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting"); if( opal_os_dirpath_is_empty(orte_process_info.top_session_dir) ) {
} if (orte_debug_flag) {
rmdir(tmp); opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) {
opal_output(0, "sess_dir_cleanup: top session dir does not exist");
} else {
opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
} }
} rmdir(orte_process_info.top_session_dir);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
opal_output(0, "sess_dir_cleanup: top session dir does not exist");
} else {
opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
}
}
}
} }
CLEANUP: CLEANUP:
if (NULL != tmp) free(tmp);
if (NULL != job_session_dir) free(job_session_dir);
return rc; return rc;
} }
@ -554,63 +447,41 @@ orte_session_dir_finalize(orte_process_name_t *proc)
char *tmp; char *tmp;
char *job_session_dir, *vpid, *proc_session_dir; char *job_session_dir, *vpid, *proc_session_dir;
if (!orte_create_session_dirs) { if (!orte_create_session_dirs ) {
/* didn't create them */ /* we haven't created them */
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
if (NULL == orte_process_info.tmpdir_base && if (NULL == orte_process_info.job_session_dir ||
NULL == orte_process_info.top_session_dir) { NULL == orte_process_info.proc_session_dir) {
/* this should never happen - it means we are calling /* this should never happen - it means we are calling
* cleanup *before* properly setting up the session * cleanup *before* properly setting up the session
* dir system. Protect against the possibility of * dir system. This leaves open the possibility of
* accidentally removing directories we shouldn't * accidentally removing directories we shouldn't
* touch by returning * touch
*/ */
return ORTE_ERR_NOT_INITIALIZED; rc = ORTE_ERR_NOT_INITIALIZED;
goto CLEANUP;
} }
/* need to setup the top_session_dir with the prefix */ opal_os_dirpath_destroy(orte_process_info.proc_session_dir,
tmp = opal_os_path(false,
orte_process_info.tmpdir_base,
orte_process_info.top_session_dir, NULL);
/* define the proc and job session directories for this process */
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, proc->vpid))) {
ORTE_ERROR_LOG(rc);
free(tmp);
return rc;
}
job_session_dir = orte_build_job_session_dir(tmp, proc, proc->jobid);
if( NULL == job_session_dir) {
free(tmp);
free(vpid);
return ORTE_ERR_OUT_OF_RESOURCE;
}
proc_session_dir = opal_os_path( false, job_session_dir, vpid, NULL );
if( NULL == proc_session_dir ) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
free(tmp);
free(vpid);
free(job_session_dir);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_os_dirpath_destroy(proc_session_dir,
false, orte_dir_check_file); false, orte_dir_check_file);
opal_os_dirpath_destroy(job_session_dir, opal_os_dirpath_destroy(orte_process_info.job_session_dir,
false, orte_dir_check_file);
opal_os_dirpath_destroy(tmp,
false, orte_dir_check_file); false, orte_dir_check_file);
if( NULL != orte_process_info.top_session_dir ){
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
false, orte_dir_check_file);
}
if (opal_os_dirpath_is_empty(proc_session_dir)) { if (opal_os_dirpath_is_empty(orte_process_info.proc_session_dir)) {
if (orte_debug_flag) { if (orte_debug_flag) {
opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting"); opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting");
} }
rmdir(proc_session_dir); rmdir(orte_process_info.proc_session_dir);
} else { } else {
if (orte_debug_flag) { if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(proc_session_dir, 0)) { if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.proc_session_dir, 0)) {
opal_output(0, "sess_dir_finalize: proc session dir does not exist"); opal_output(0, "sess_dir_finalize: proc session dir does not exist");
} else { } else {
opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving"); opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving");
@ -619,14 +490,15 @@ orte_session_dir_finalize(orte_process_name_t *proc)
goto CLEANUP; goto CLEANUP;
} }
if (opal_os_dirpath_is_empty(job_session_dir)) { if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
if (orte_debug_flag) { if (orte_debug_flag) {
opal_output(0, "sess_dir_finalize: found job session dir empty - deleting"); opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
} }
rmdir(job_session_dir); rmdir(orte_process_info.job_session_dir);
} else { } else {
if (orte_debug_flag) { if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) { if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
opal_output(0, "sess_dir_finalize: job session dir does not exist"); opal_output(0, "sess_dir_finalize: job session dir does not exist");
} else { } else {
opal_output(0, "sess_dir_finalize: job session dir not empty - leaving"); opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
@ -635,26 +507,25 @@ orte_session_dir_finalize(orte_process_name_t *proc)
goto CLEANUP; goto CLEANUP;
} }
if (opal_os_dirpath_is_empty(tmp)) { if(NULL != orte_process_info.top_session_dir) {
if (orte_debug_flag) { if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting"); if (orte_debug_flag) {
} opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
rmdir(tmp);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) {
opal_output(0, "sess_dir_finalize: top session dir does not exist");
} else {
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
} }
} rmdir(tmp);
} else {
if (orte_debug_flag) {
if (OPAL_ERR_NOT_FOUND ==
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
opal_output(0, "sess_dir_finalize: top session dir does not exist");
} else {
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
}
}
}
} }
CLEANUP: CLEANUP:
free(tmp);
free(vpid);
free(job_session_dir);
free(proc_session_dir);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -680,33 +551,3 @@ orte_dir_check_file(const char *root, const char *path)
return true; return true;
} }
static char *orte_build_job_session_dir(char *top_dir,
orte_process_name_t *proc,
orte_jobid_t jobid)
{
char *job_session_dir;
if (ORTE_JOBID_WILDCARD != jobid) {
char *job = NULL;
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
job_session_dir = NULL;
goto out;
}
job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, job, NULL);
free(job);
if (NULL == job_session_dir) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
} else {
job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, NULL);
if( NULL == job_session_dir) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
}
}
out:
return job_session_dir;
}

Просмотреть файл

@ -99,19 +99,6 @@ BEGIN_C_DECLS
* locate an already existing universe for reconnection * locate an already existing universe for reconnection
* purposes. If set to "true", then the function * purposes. If set to "true", then the function
* creates the directory, if possible. * creates the directory, if possible.
* @param prefix A string variable indicating where the user
* stipulated the directory should be found or
* placed. A value of "NULL" indicates that the user
* specified no location - hence, the function explores
* a range of "standard" locations.
* @param hostid Name of the host on which the session directory is
* being built. Used to build the name of the
* "openmpi-sessions-[user]@[host]:[batch]" branch of
* the directory tree. NULL indicates that the nodename
* found in orte_process_info is to be used.
* @param batchid Batch job name, used in batch scheduling
* systems. NULL indicates that the default of "0" is
* to be used.
* @param proc Pointer to a process name for which the session * @param proc Pointer to a process name for which the session
* dir name is desired * dir name is desired
* *
@ -120,18 +107,13 @@ BEGIN_C_DECLS
* @retval OMPI_ERROR The directory cannot be found (if create is * @retval OMPI_ERROR The directory cannot be found (if create is
* "false") or created (if create is "true"). * "false") or created (if create is "true").
*/ */
ORTE_DECLSPEC int orte_session_dir(bool create, char *prefix, char *hostid, ORTE_DECLSPEC int orte_session_dir(bool create, orte_process_name_t *proc);
orte_process_name_t *proc);
/* /*
* Construct the session directory name from the input parameters. * Setup session-related directory paths
* This function does no checking that the directory exists, or can be used
*/ */
ORTE_DECLSPEC int orte_session_dir_get_name(char **fulldirpath, ORTE_DECLSPEC int orte_session_setup_base(orte_process_name_t *proc);
char **prfx,
char **frontend,
char *hostid,
orte_process_name_t *proc);
/** The orte_session_dir_finalize() function performs a cleanup of the /** The orte_session_dir_finalize() function performs a cleanup of the
* session directory tree. It first removes the session directory for * session directory tree. It first removes the session directory for