Several fixes related to session directories:
* enable OMPI to retrieve paths from RM through PMIx * cleanups related to tempdirs.
Этот коммит содержится в:
родитель
fb51d65049
Коммит
81195ab724
@ -136,10 +136,7 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
|||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
(NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
|
||||||
orte_process_info.nodename));
|
orte_process_info.nodename));
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
|
||||||
orte_process_info.tmpdir_base,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
ORTE_PROC_MY_NAME))) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_session_dir";
|
error = "orte_session_dir";
|
||||||
goto error;
|
goto error;
|
||||||
@ -149,29 +146,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
|||||||
proc-specific session directory. */
|
proc-specific session directory. */
|
||||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
||||||
"output-", NULL, NULL);
|
"output-", NULL, NULL);
|
||||||
/* store the session directory location */
|
|
||||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
||||||
kv.key = strdup(OPAL_PMIX_NSDIR);
|
|
||||||
kv.type = OPAL_STRING;
|
|
||||||
kv.data.string = strdup(orte_process_info.job_session_dir);
|
|
||||||
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
error = "opal pmix put job sessiondir";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
||||||
kv.key = strdup(OPAL_PMIX_PROCDIR);
|
|
||||||
kv.type = OPAL_STRING;
|
|
||||||
kv.data.string = strdup(orte_process_info.proc_session_dir);
|
|
||||||
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &kv))) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
error = "opal pmix put proc sessiondir";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
OBJ_DESTRUCT(&kv);
|
|
||||||
}
|
}
|
||||||
/* Setup the communication infrastructure */
|
/* Setup the communication infrastructure */
|
||||||
/*
|
/*
|
||||||
|
@ -237,10 +237,7 @@ int orte_ess_base_orted_setup(char **hosts)
|
|||||||
/* take a pass thru the session directory code to fillin the
|
/* take a pass thru the session directory code to fillin the
|
||||||
* tmpdir names - don't create anything yet
|
* tmpdir names - don't create anything yet
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(false,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
|
||||||
orte_process_info.tmpdir_base,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
ORTE_PROC_MY_NAME))) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_session_dir define";
|
error = "orte_session_dir define";
|
||||||
goto error;
|
goto error;
|
||||||
@ -250,10 +247,7 @@ int orte_ess_base_orted_setup(char **hosts)
|
|||||||
*/
|
*/
|
||||||
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
|
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
|
||||||
/* now actually create the directory tree */
|
/* now actually create the directory tree */
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
|
||||||
orte_process_info.tmpdir_base,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
ORTE_PROC_MY_NAME))) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_session_dir";
|
error = "orte_session_dir";
|
||||||
goto error;
|
goto error;
|
||||||
@ -277,11 +271,8 @@ int orte_ess_base_orted_setup(char **hosts)
|
|||||||
/* define a log file name in the session directory */
|
/* define a log file name in the session directory */
|
||||||
snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log",
|
snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log",
|
||||||
jobidstring, orte_process_info.nodename);
|
jobidstring, orte_process_info.nodename);
|
||||||
log_path = opal_os_path(false,
|
log_path = opal_os_path(false, orte_process_info.top_session_dir,
|
||||||
orte_process_info.tmpdir_base,
|
log_file, NULL);
|
||||||
orte_process_info.top_session_dir,
|
|
||||||
log_file,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640);
|
fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
|
@ -145,10 +145,9 @@ int orte_ess_base_tool_setup(void)
|
|||||||
* tmp base where any other session directories on
|
* tmp base where any other session directories on
|
||||||
* this node might be located
|
* this node might be located
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir_get_name(NULL,
|
|
||||||
&orte_process_info.tmpdir_base,
|
ret = orte_session_setup_base(NULL);
|
||||||
&orte_process_info.top_session_dir,
|
if (ORTE_SUCCESS != ret ) {
|
||||||
orte_process_info.nodename, NULL))) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "define session dir names";
|
error = "define session dir names";
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -138,7 +138,7 @@ static int rte_init(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
char *error = NULL;
|
char *error = NULL;
|
||||||
char *contact_path, *jobfam_dir;
|
char *contact_path;
|
||||||
orte_job_t *jdata;
|
orte_job_t *jdata;
|
||||||
orte_node_t *node;
|
orte_node_t *node;
|
||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
@ -294,10 +294,7 @@ static int rte_init(void)
|
|||||||
/* take a pass thru the session directory code to fillin the
|
/* take a pass thru the session directory code to fillin the
|
||||||
* tmpdir names - don't create anything yet
|
* tmpdir names - don't create anything yet
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(false,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(false, ORTE_PROC_MY_NAME))) {
|
||||||
orte_process_info.tmpdir_base,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
ORTE_PROC_MY_NAME))) {
|
|
||||||
error = "orte_session_dir define";
|
error = "orte_session_dir define";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -307,10 +304,7 @@ static int rte_init(void)
|
|||||||
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
|
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
|
||||||
|
|
||||||
/* now actually create the directory tree */
|
/* now actually create the directory tree */
|
||||||
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
|
if (ORTE_SUCCESS != (ret = orte_session_dir(true, ORTE_PROC_MY_NAME))) {
|
||||||
orte_process_info.tmpdir_base,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
ORTE_PROC_MY_NAME))) {
|
|
||||||
error = "orte_session_dir";
|
error = "orte_session_dir";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -586,9 +580,12 @@ static int rte_init(void)
|
|||||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
||||||
"output-", NULL, NULL);
|
"output-", NULL, NULL);
|
||||||
/* save my contact info in a file for others to find */
|
/* save my contact info in a file for others to find */
|
||||||
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
|
if( NULL == orte_process_info.jobfam_session_dir ){
|
||||||
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
|
/* has to be set here! */
|
||||||
free(jobfam_dir);
|
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir, "contact.txt", NULL);
|
||||||
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
OPAL_OUTPUT_VERBOSE((2, orte_debug_output,
|
||||||
"%s writing contact file %s",
|
"%s writing contact file %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
@ -758,10 +755,9 @@ static int rte_init(void)
|
|||||||
true, error, ORTE_ERROR_NAME(ret), ret);
|
true, error, ORTE_ERROR_NAME(ret), ret);
|
||||||
}
|
}
|
||||||
/* remove my contact info file, if we have session directories */
|
/* remove my contact info file, if we have session directories */
|
||||||
if (NULL != orte_process_info.job_session_dir) {
|
if (NULL != orte_process_info.jobfam_session_dir) {
|
||||||
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
|
contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
|
||||||
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
|
"contact.txt", NULL);
|
||||||
free(jobfam_dir);
|
|
||||||
unlink(contact_path);
|
unlink(contact_path);
|
||||||
free(contact_path);
|
free(contact_path);
|
||||||
}
|
}
|
||||||
@ -775,7 +771,6 @@ static int rte_init(void)
|
|||||||
static int rte_finalize(void)
|
static int rte_finalize(void)
|
||||||
{
|
{
|
||||||
char *contact_path;
|
char *contact_path;
|
||||||
char *jobfam_dir;
|
|
||||||
|
|
||||||
if (signals_set) {
|
if (signals_set) {
|
||||||
/* Remove the epipe handler */
|
/* Remove the epipe handler */
|
||||||
@ -816,10 +811,9 @@ static int rte_finalize(void)
|
|||||||
(void) mca_base_framework_close(&opal_pstat_base_framework);
|
(void) mca_base_framework_close(&opal_pstat_base_framework);
|
||||||
|
|
||||||
/* remove my contact info file, if we have session directories */
|
/* remove my contact info file, if we have session directories */
|
||||||
if (NULL != orte_process_info.job_session_dir) {
|
if (NULL != orte_process_info.jobfam_session_dir) {
|
||||||
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
|
contact_path = opal_os_path(false, orte_process_info.jobfam_session_dir,
|
||||||
contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL);
|
"contact.txt", NULL);
|
||||||
free(jobfam_dir);
|
|
||||||
unlink(contact_path);
|
unlink(contact_path);
|
||||||
free(contact_path);
|
free(contact_path);
|
||||||
}
|
}
|
||||||
|
@ -242,6 +242,43 @@ static int rte_init(void)
|
|||||||
free(string_key);
|
free(string_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* retrieve temp directories info */
|
||||||
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
|
||||||
|
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||||
|
/* TODO: who has precedence - pmix of MCA setting??? */
|
||||||
|
if( NULL == orte_process_info.top_session_dir ){
|
||||||
|
orte_process_info.top_session_dir = val;
|
||||||
|
} else {
|
||||||
|
/* keep the MCA setting */
|
||||||
|
free(val);
|
||||||
|
}
|
||||||
|
val = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
|
||||||
|
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||||
|
/* TODO: who has precedence - pmix of MCA setting??? */
|
||||||
|
if( NULL == orte_process_info.job_session_dir ){
|
||||||
|
orte_process_info.job_session_dir = val;
|
||||||
|
} else {
|
||||||
|
/* keep the MCA setting */
|
||||||
|
free(val);
|
||||||
|
}
|
||||||
|
val = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
|
||||||
|
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||||
|
/* TODO: who has precedence - pmix of MCA setting??? */
|
||||||
|
if( NULL == orte_process_info.proc_session_dir ){
|
||||||
|
orte_process_info.proc_session_dir = val;
|
||||||
|
} else {
|
||||||
|
/* keep the MCA setting */
|
||||||
|
free(val);
|
||||||
|
}
|
||||||
|
val = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* retrieve our topology */
|
/* retrieve our topology */
|
||||||
val = NULL;
|
val = NULL;
|
||||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,
|
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,
|
||||||
|
@ -105,6 +105,17 @@ static void recv_ack(int status, orte_process_name_t* sender,
|
|||||||
void* cbdata);
|
void* cbdata);
|
||||||
static void write_handler(int fd, short event, void *cbdata);
|
static void write_handler(int fd, short event, void *cbdata);
|
||||||
|
|
||||||
|
static char *filem_session_dir()
|
||||||
|
{
|
||||||
|
char *session_dir = orte_process_info.jobfam_session_dir;
|
||||||
|
if( NULL == session_dir ){
|
||||||
|
/* if no job family session dir was provided -
|
||||||
|
* use the job session dir */
|
||||||
|
session_dir = orte_process_info.job_session_dir;
|
||||||
|
}
|
||||||
|
return session_dir;
|
||||||
|
}
|
||||||
|
|
||||||
static int raw_init(void)
|
static int raw_init(void)
|
||||||
{
|
{
|
||||||
OBJ_CONSTRUCT(&incoming_files, opal_list_t);
|
OBJ_CONSTRUCT(&incoming_files, opal_list_t);
|
||||||
@ -657,25 +668,26 @@ static int create_link(char *my_dir, char *path,
|
|||||||
static int raw_link_local_files(orte_job_t *jdata,
|
static int raw_link_local_files(orte_job_t *jdata,
|
||||||
orte_app_context_t *app)
|
orte_app_context_t *app)
|
||||||
{
|
{
|
||||||
char *my_dir, *path=NULL;
|
char *session_dir, *path=NULL;
|
||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
char *prefix;
|
|
||||||
int i, j, rc;
|
int i, j, rc;
|
||||||
orte_filem_raw_incoming_t *inbnd;
|
orte_filem_raw_incoming_t *inbnd;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
char **files=NULL, *bname, *filestring;
|
char **files=NULL, *bname, *filestring;
|
||||||
|
|
||||||
/* check my session directory for files I have received and
|
/* check my jobfam session directory for files I have received and
|
||||||
* symlink them to the proc-level session directory of each
|
* symlink them to the proc-level session directory of each
|
||||||
* local process in the job
|
* local process in the job
|
||||||
|
*
|
||||||
|
* TODO: @rhc - please check that I've correctly interpret your
|
||||||
|
* intention here
|
||||||
*/
|
*/
|
||||||
my_dir = opal_dirname(orte_process_info.job_session_dir);
|
session_dir = filem_session_dir();
|
||||||
|
if( NULL == session_dir){
|
||||||
/* setup */
|
/* we were unable to find any suitable directory */
|
||||||
if (NULL != orte_process_info.tmpdir_base) {
|
rc = ORTE_ERR_BAD_PARAM;
|
||||||
prefix = strdup(orte_process_info.tmpdir_base);
|
ORTE_ERROR_LOG(rc);
|
||||||
} else {
|
return rc;
|
||||||
prefix = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the list of files this app wants */
|
/* get the list of files this app wants */
|
||||||
@ -692,10 +704,6 @@ static int raw_link_local_files(orte_job_t *jdata,
|
|||||||
|
|
||||||
/* if there are no files to link, then ignore this */
|
/* if there are no files to link, then ignore this */
|
||||||
if (NULL == files) {
|
if (NULL == files) {
|
||||||
free(my_dir);
|
|
||||||
if (NULL != prefix) {
|
|
||||||
free(prefix);
|
|
||||||
}
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -736,10 +744,8 @@ static int raw_link_local_files(orte_job_t *jdata,
|
|||||||
ORTE_NAME_PRINT(&proc->name)));
|
ORTE_NAME_PRINT(&proc->name)));
|
||||||
|
|
||||||
/* get the session dir name in absolute form */
|
/* get the session dir name in absolute form */
|
||||||
path = NULL;
|
path = orte_process_info.proc_session_dir;
|
||||||
rc = orte_session_dir_get_name(&path, &prefix, NULL,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
&proc->name);
|
|
||||||
/* create it, if it doesn't already exist */
|
/* create it, if it doesn't already exist */
|
||||||
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) {
|
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -747,11 +753,6 @@ static int raw_link_local_files(orte_job_t *jdata,
|
|||||||
* create it - either way, we are done
|
* create it - either way, we are done
|
||||||
*/
|
*/
|
||||||
free(files);
|
free(files);
|
||||||
if (NULL != prefix) {
|
|
||||||
free(prefix);
|
|
||||||
}
|
|
||||||
free(path);
|
|
||||||
free(my_dir);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -775,13 +776,8 @@ static int raw_link_local_files(orte_job_t *jdata,
|
|||||||
inbnd->file));
|
inbnd->file));
|
||||||
/* cycle thru the link points and create symlinks to them */
|
/* cycle thru the link points and create symlinks to them */
|
||||||
for (j=0; NULL != inbnd->link_pts[j]; j++) {
|
for (j=0; NULL != inbnd->link_pts[j]; j++) {
|
||||||
if (ORTE_SUCCESS != (rc = create_link(my_dir, path, inbnd->link_pts[j]))) {
|
if (ORTE_SUCCESS != (rc = create_link(session_dir, path, inbnd->link_pts[j]))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
free(my_dir);
|
|
||||||
free(path);
|
|
||||||
if (NULL != prefix) {
|
|
||||||
free(prefix);
|
|
||||||
}
|
|
||||||
free(files);
|
free(files);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -796,13 +792,8 @@ static int raw_link_local_files(orte_job_t *jdata,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(path);
|
|
||||||
}
|
}
|
||||||
opal_argv_free(files);
|
opal_argv_free(files);
|
||||||
if (NULL != prefix) {
|
|
||||||
free(prefix);
|
|
||||||
}
|
|
||||||
free(my_dir);
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -999,7 +990,7 @@ static void recv_files(int status, orte_process_name_t* sender,
|
|||||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||||
void* cbdata)
|
void* cbdata)
|
||||||
{
|
{
|
||||||
char *file, *jobfam_dir;
|
char *file, *session_dir;
|
||||||
int32_t nchunk, n, nbytes;
|
int32_t nchunk, n, nbytes;
|
||||||
unsigned char data[ORTE_FILEM_RAW_CHUNK_MAX];
|
unsigned char data[ORTE_FILEM_RAW_CHUNK_MAX];
|
||||||
int rc;
|
int rc;
|
||||||
@ -1086,9 +1077,9 @@ static void recv_files(int status, orte_process_name_t* sender,
|
|||||||
incoming->top = strdup(tmp);
|
incoming->top = strdup(tmp);
|
||||||
free(tmp);
|
free(tmp);
|
||||||
/* define the full path to where we will put it */
|
/* define the full path to where we will put it */
|
||||||
jobfam_dir = opal_dirname(orte_process_info.job_session_dir);
|
session_dir = filem_session_dir();
|
||||||
incoming->fullpath = opal_os_path(false, jobfam_dir, file, NULL);
|
|
||||||
free(jobfam_dir);
|
incoming->fullpath = opal_os_path(false, session_dir, file, NULL);
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_filem_base_framework.framework_output,
|
||||||
"%s filem:raw: opening target file %s",
|
"%s filem:raw: opening target file %s",
|
||||||
|
@ -164,8 +164,7 @@ static int component_available(void)
|
|||||||
|
|
||||||
/* if session directories were forbidden, then we cannot be used */
|
/* if session directories were forbidden, then we cannot be used */
|
||||||
if (!orte_create_session_dirs ||
|
if (!orte_create_session_dirs ||
|
||||||
NULL == orte_process_info.tmpdir_base ||
|
NULL == orte_process_info.jobfam_session_dir ) {
|
||||||
NULL == orte_process_info.top_session_dir) {
|
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
return ORTE_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,9 +215,7 @@ static int component_startup(void)
|
|||||||
/* setup the path to the daemon rendezvous point */
|
/* setup the path to the daemon rendezvous point */
|
||||||
memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un));
|
memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un));
|
||||||
mca_oob_usock_component.address.sun_family = AF_UNIX;
|
mca_oob_usock_component.address.sun_family = AF_UNIX;
|
||||||
session = opal_os_path(false, orte_process_info.tmpdir_base,
|
session = opal_os_path(false, orte_process_info.jobfam_session_dir,
|
||||||
orte_process_info.top_session_dir,
|
|
||||||
orte_process_info.jobfam_session_dir,
|
|
||||||
"usock", NULL);
|
"usock", NULL);
|
||||||
if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) {
|
if ((strlen(session) + 1) > sizeof(mca_oob_usock_component.address.sun_path)-1) {
|
||||||
opal_output(0, "SESSION DIR TOO LONG");
|
opal_output(0, "SESSION DIR TOO LONG");
|
||||||
|
@ -943,6 +943,7 @@ static int setup_fork(orte_job_t *jdata,
|
|||||||
|
|
||||||
/* forcibly set the local tmpdir base and top session dir to match ours */
|
/* forcibly set the local tmpdir base and top session dir to match ours */
|
||||||
opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env);
|
opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env);
|
||||||
|
/* TODO: should we use PMIx key to pass this data? */
|
||||||
opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env);
|
opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env);
|
||||||
opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env);
|
opal_setenv("OMPI_MCA_orte_jobfam_session_dir", orte_process_info.jobfam_session_dir, true, &app->env);
|
||||||
|
|
||||||
@ -1102,24 +1103,8 @@ static int setup_child(orte_job_t *jdata,
|
|||||||
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
|
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* construct the proc's session dir name */
|
|
||||||
if (NULL != orte_process_info.tmpdir_base) {
|
|
||||||
value = strdup(orte_process_info.tmpdir_base);
|
|
||||||
} else {
|
|
||||||
value = NULL;
|
|
||||||
}
|
|
||||||
param = NULL;
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(¶m, &value, NULL,
|
|
||||||
orte_process_info.nodename,
|
|
||||||
&child->name))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
if (NULL != value) {
|
|
||||||
free(value);
|
|
||||||
}
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
free(value);
|
|
||||||
/* pass an envar so the proc can find any files it had prepositioned */
|
/* pass an envar so the proc can find any files it had prepositioned */
|
||||||
|
param = orte_process_info.proc_session_dir;
|
||||||
opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env);
|
opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env);
|
||||||
|
|
||||||
/* if the user wanted the cwd to be the proc's session dir, then
|
/* if the user wanted the cwd to be the proc's session dir, then
|
||||||
@ -1132,12 +1117,10 @@ static int setup_child(orte_job_t *jdata,
|
|||||||
/* doesn't exist with correct permissions, and/or we can't
|
/* doesn't exist with correct permissions, and/or we can't
|
||||||
* create it - either way, we are done
|
* create it - either way, we are done
|
||||||
*/
|
*/
|
||||||
free(param);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
/* change to it */
|
/* change to it */
|
||||||
if (0 != chdir(param)) {
|
if (0 != chdir(param)) {
|
||||||
free(param);
|
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
/* It seems that chdir doesn't
|
/* It seems that chdir doesn't
|
||||||
@ -1154,6 +1137,5 @@ static int setup_child(orte_job_t *jdata,
|
|||||||
/* update the initial wdir value too */
|
/* update the initial wdir value too */
|
||||||
opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env);
|
opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env);
|
||||||
}
|
}
|
||||||
free(param);
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -2132,7 +2132,8 @@ static void orte_debugger_init_before_spawn(orte_job_t *jdata)
|
|||||||
/* create the attachment FIFO and setup readevent - cannot be
|
/* create the attachment FIFO and setup readevent - cannot be
|
||||||
* done if no session dirs exist!
|
* done if no session dirs exist!
|
||||||
*/
|
*/
|
||||||
attach_fifo = opal_os_path(false, orte_process_info.job_session_dir, "debugger_attach_fifo", NULL);
|
attach_fifo = opal_os_path(false, orte_process_info.job_session_dir,
|
||||||
|
"debugger_attach_fifo", NULL);
|
||||||
if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) {
|
if ((mkfifo(attach_fifo, FILE_MODE) < 0) && errno != EEXIST) {
|
||||||
opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno);
|
opal_output(0, "CANNOT CREATE FIFO %s: errno %d", attach_fifo, errno);
|
||||||
free(attach_fifo);
|
free(attach_fifo);
|
||||||
|
@ -262,9 +262,7 @@ int pmix_server_init(void)
|
|||||||
kv = OBJ_NEW(opal_value_t);
|
kv = OBJ_NEW(opal_value_t);
|
||||||
kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR);
|
kv->key = strdup(OPAL_PMIX_SERVER_TMPDIR);
|
||||||
kv->type = OPAL_STRING;
|
kv->type = OPAL_STRING;
|
||||||
kv->data.string = opal_os_path(false, orte_process_info.tmpdir_base,
|
kv->data.string = opal_os_path(false, orte_process_info.jobfam_session_dir, NULL);
|
||||||
orte_process_info.top_session_dir,
|
|
||||||
orte_process_info.jobfam_session_dir, NULL);
|
|
||||||
opal_list_append(&info, &kv->super);
|
opal_list_append(&info, &kv->super);
|
||||||
/* use the same for the system temp directory - this is
|
/* use the same for the system temp directory - this is
|
||||||
* where the system-level tool connections will go */
|
* where the system-level tool connections will go */
|
||||||
|
@ -310,7 +310,7 @@ static int orte_cr_coord_post_restart(void) {
|
|||||||
* Add the previous session directory for cleanup
|
* Add the previous session directory for cleanup
|
||||||
*/
|
*/
|
||||||
opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
|
opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
|
||||||
tmp_dir = opal_dirname(orte_process_info.job_session_dir);
|
tmp_dir = orte_process_info.jobfam_session_dir;
|
||||||
if( NULL != tmp_dir ) {
|
if( NULL != tmp_dir ) {
|
||||||
opal_crs_base_cleanup_append(tmp_dir, true);
|
opal_crs_base_cleanup_append(tmp_dir, true);
|
||||||
free(tmp_dir);
|
free(tmp_dir);
|
||||||
|
@ -182,7 +182,7 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect)
|
|||||||
/*
|
/*
|
||||||
* Check to make sure we have access to the top-level directory
|
* Check to make sure we have access to the top-level directory
|
||||||
*/
|
*/
|
||||||
headdir = opal_os_path(false, orte_process_info.tmpdir_base, orte_process_info.top_session_dir, NULL);
|
headdir = orte_process_info.top_session_dir;
|
||||||
|
|
||||||
if( ORTE_SUCCESS != (ret = opal_os_dirpath_access(headdir, 0) )) {
|
if( ORTE_SUCCESS != (ret = opal_os_dirpath_access(headdir, 0) )) {
|
||||||
/* it is okay not to find this as there may not be any
|
/* it is okay not to find this as there may not be any
|
||||||
@ -231,7 +231,6 @@ int orte_list_local_hnps(opal_list_t *hnps, bool connect)
|
|||||||
cleanup:
|
cleanup:
|
||||||
if( NULL != cur_dirp )
|
if( NULL != cur_dirp )
|
||||||
closedir(cur_dirp);
|
closedir(cur_dirp);
|
||||||
free(headdir);
|
|
||||||
|
|
||||||
return (opal_list_is_empty(hnps) ? ORTE_ERR_NOT_FOUND : ORTE_SUCCESS);
|
return (opal_list_is_empty(hnps) ? ORTE_ERR_NOT_FOUND : ORTE_SUCCESS);
|
||||||
}
|
}
|
||||||
|
@ -108,6 +108,14 @@ int orte_proc_info(void)
|
|||||||
if (init) {
|
if (init) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
int delay = 0;
|
||||||
|
while( delay ){
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
init = true;
|
init = true;
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&orte_process_info.super, opal_proc_t);
|
OBJ_CONSTRUCT(&orte_process_info.super, opal_proc_t);
|
||||||
|
@ -73,10 +73,6 @@ static int orte_create_dir(char *directory);
|
|||||||
|
|
||||||
static bool orte_dir_check_file(const char *root, const char *path);
|
static bool orte_dir_check_file(const char *root, const char *path);
|
||||||
|
|
||||||
static char *orte_build_job_session_dir(char *top_dir,
|
|
||||||
orte_process_name_t *proc,
|
|
||||||
orte_jobid_t jobid);
|
|
||||||
|
|
||||||
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
|
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
|
||||||
|
|
||||||
/****************************
|
/****************************
|
||||||
@ -112,175 +108,186 @@ static int orte_create_dir(char *directory)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Construct the fullpath to the session directory - it
|
static int _setup_tmpdir_base()
|
||||||
* will consist of "ompi.<hostname>.<effective-uid>", and
|
{
|
||||||
* have subdirs:
|
int rc = ORTE_SUCCESS;
|
||||||
*
|
|
||||||
* pid - the pid of the mpirun that oversees this job. Note
|
/* make sure that we have tmpdir_base set
|
||||||
* that direct-launched processes will have manufactured
|
* if we need it
|
||||||
* this value
|
|
||||||
*
|
|
||||||
* jobid - jobid of the application being executed
|
|
||||||
*
|
|
||||||
* vpid - vpid of the process
|
|
||||||
*/
|
*/
|
||||||
int
|
if (NULL == orte_process_info.tmpdir_base) {
|
||||||
orte_session_dir_get_name(char **fulldirpath,
|
orte_process_info.tmpdir_base =
|
||||||
char **return_prefix, /* This will come back as the valid tmp dir */
|
strdup(opal_tmp_directory());
|
||||||
char **return_frontend,
|
if (NULL == orte_process_info.tmpdir_base) {
|
||||||
char *hostid,
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
orte_process_name_t *proc) {
|
goto exit;
|
||||||
char *hostname = NULL,
|
}
|
||||||
*sessions = NULL,
|
}
|
||||||
*prefix = NULL,
|
exit:
|
||||||
*frontend = NULL,
|
if( ORTE_SUCCESS != rc ){
|
||||||
*jobfam = NULL,
|
ORTE_ERROR_LOG(rc);
|
||||||
*job = NULL,
|
}
|
||||||
*vpidstr = NULL;
|
return rc;
|
||||||
bool prefix_provided = false;
|
}
|
||||||
int exit_status = ORTE_SUCCESS;
|
|
||||||
size_t len;
|
static int _setup_top_session_dir()
|
||||||
uid_t uid;
|
{
|
||||||
|
int rc = ORTE_SUCCESS;
|
||||||
|
/* get the effective uid */
|
||||||
|
uid_t uid = geteuid();
|
||||||
|
|
||||||
|
/* construct the top_session_dir if we need */
|
||||||
|
if (NULL == orte_process_info.top_session_dir) {
|
||||||
|
if (ORTE_SUCCESS != (rc = _setup_tmpdir_base())) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
if( NULL == orte_process_info.nodename ||
|
||||||
|
NULL == orte_process_info.tmpdir_base ){
|
||||||
|
/* we can't setup top session dir */
|
||||||
|
rc = ORTE_ERR_BAD_PARAM;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 > asprintf(&orte_process_info.top_session_dir,
|
||||||
|
"%s/ompi.%s.%lu", orte_process_info.tmpdir_base,
|
||||||
|
orte_process_info.nodename, (unsigned long)uid)) {
|
||||||
|
orte_process_info.top_session_dir = NULL;
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exit:
|
||||||
|
if( ORTE_SUCCESS != rc ){
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _setup_jobfam_session_dir(orte_process_name_t *proc)
|
||||||
|
{
|
||||||
|
int rc = ORTE_SUCCESS;
|
||||||
|
|
||||||
|
/* construct the top_session_dir if we need */
|
||||||
|
if (NULL == orte_process_info.jobfam_session_dir) {
|
||||||
|
if (ORTE_SUCCESS != (rc = _setup_top_session_dir())) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ORTE_PROC_IS_HNP) {
|
||||||
|
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
|
||||||
|
"%s/pid.%lu", orte_process_info.top_session_dir,
|
||||||
|
(unsigned long)orte_process_info.pid) ) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* we were not given one, so define it */
|
||||||
|
if (NULL == proc || (ORTE_JOBID_INVALID == proc->jobid) ) {
|
||||||
|
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
|
||||||
|
"%s/jobfam", orte_process_info.top_session_dir) ) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (0 > asprintf(&orte_process_info.jobfam_session_dir,
|
||||||
|
"%s/jf.%d", orte_process_info.top_session_dir,
|
||||||
|
ORTE_JOB_FAMILY(proc->jobid))) {
|
||||||
|
orte_process_info.jobfam_session_dir = NULL;
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
exit:
|
||||||
|
if( ORTE_SUCCESS != rc ){
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_setup_job_session_dir(orte_process_name_t *proc)
|
||||||
|
{
|
||||||
|
int rc = ORTE_SUCCESS;
|
||||||
|
|
||||||
|
/* construct the top_session_dir if we need */
|
||||||
|
if( NULL == orte_process_info.job_session_dir ){
|
||||||
|
if( ORTE_SUCCESS != (rc = _setup_jobfam_session_dir(proc)) ){
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
if (ORTE_JOBID_INVALID != proc->jobid) {
|
||||||
|
if (0 > asprintf(&orte_process_info.job_session_dir,
|
||||||
|
"%s/%d", orte_process_info.jobfam_session_dir,
|
||||||
|
ORTE_LOCAL_JOBID(proc->jobid))) {
|
||||||
|
orte_process_info.job_session_dir = NULL;
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
orte_process_info.job_session_dir = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
if( ORTE_SUCCESS != rc ){
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_setup_proc_session_dir(orte_process_name_t *proc)
|
||||||
|
{
|
||||||
|
int rc = ORTE_SUCCESS;
|
||||||
|
|
||||||
|
/* construct the top_session_dir if we need */
|
||||||
|
if( NULL == orte_process_info.proc_session_dir ){
|
||||||
|
if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
if (ORTE_VPID_INVALID != proc->vpid) {
|
||||||
|
if (0 > asprintf(&orte_process_info.proc_session_dir,
|
||||||
|
"%s/%d", orte_process_info.job_session_dir,
|
||||||
|
proc->vpid)) {
|
||||||
|
orte_process_info.proc_session_dir = NULL;
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
orte_process_info.proc_session_dir = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
if( ORTE_SUCCESS != rc ){
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
int orte_session_setup_base(orte_process_name_t *proc)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
/* Ensure that system info is set */
|
/* Ensure that system info is set */
|
||||||
orte_proc_info();
|
orte_proc_info();
|
||||||
|
|
||||||
/* get the effective uid */
|
/* setup job and proc session directories */
|
||||||
uid = geteuid();
|
if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
|
||||||
|
return rc;
|
||||||
/*
|
|
||||||
* set the 'hostname'
|
|
||||||
*/
|
|
||||||
if( NULL != hostid) { /* User specified version */
|
|
||||||
hostname = strdup(hostid);
|
|
||||||
}
|
|
||||||
else { /* check if it is set elsewhere */
|
|
||||||
if( NULL != orte_process_info.nodename)
|
|
||||||
hostname = strdup(orte_process_info.nodename);
|
|
||||||
else {
|
|
||||||
/* Couldn't find it, so fail */
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
|
||||||
exit_status = ORTE_ERR_BAD_PARAM;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* construct the frontend of the session directory*/
|
if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){
|
||||||
if (NULL != orte_process_info.top_session_dir) {
|
return rc;
|
||||||
frontend = strdup(orte_process_info.top_session_dir);
|
|
||||||
} else { /* If not set then construct it */
|
|
||||||
if (0 > asprintf(&frontend, "ompi.%s.%lu", hostname, (unsigned long)uid)) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* construct the next level down, which belongs to the
|
|
||||||
* job family. This is related to the mpirun that launched
|
|
||||||
* the job, or is an arbitrary (agreed upon) value if
|
|
||||||
* direct launched */
|
|
||||||
if (ORTE_PROC_IS_HNP) {
|
|
||||||
if (0 > asprintf(&jobfam, "pid.%lu", (unsigned long)orte_process_info.pid)) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
orte_process_info.jobfam_session_dir = strdup(jobfam);
|
|
||||||
} else if (NULL != orte_process_info.jobfam_session_dir) {
|
|
||||||
/* we had a job family session dir passed down to us by mpirun */
|
|
||||||
jobfam = strdup(orte_process_info.jobfam_session_dir);
|
|
||||||
} else {
|
|
||||||
/* we were not given one, so define it */
|
|
||||||
if (NULL == proc) {
|
|
||||||
jobfam = strdup("jobfam");
|
|
||||||
} else {
|
|
||||||
if (0 > asprintf(&jobfam, "jf.%d", ORTE_JOB_FAMILY(proc->jobid))) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
orte_process_info.jobfam_session_dir = strdup(jobfam);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Construct the session directory
|
|
||||||
*/
|
|
||||||
/* If we were given a valid vpid then we can construct it fully */
|
|
||||||
if( NULL != proc) {
|
|
||||||
if (ORTE_VPID_INVALID != proc->vpid) {
|
|
||||||
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ORTE_SUCCESS != orte_util_convert_vpid_to_string(&vpidstr, proc->vpid)) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
sessions = opal_os_path(false, frontend, jobfam, job, vpidstr, NULL);
|
|
||||||
if( NULL == sessions ) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
|
||||||
exit_status = ORTE_ERROR;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
} else if (ORTE_JOBID_INVALID != proc->jobid) {
|
|
||||||
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(proc->jobid))) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
exit_status = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
sessions = opal_os_path( false, frontend, jobfam, job, NULL );
|
|
||||||
if( NULL == sessions ) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
|
||||||
exit_status = ORTE_ERROR;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sessions = strdup(frontend); /* must dup this to avoid double-free later */
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
/* If we were not given a proc at all, then we just set it to frontend */
|
|
||||||
sessions = strdup(frontend); /* must dup this to avoid double-free later */
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the user specified an invalid prefix, or no prefix at all
|
|
||||||
* we need to keep looking
|
|
||||||
*/
|
|
||||||
if( NULL != fulldirpath && NULL != *fulldirpath) {
|
|
||||||
free(*fulldirpath);
|
|
||||||
*fulldirpath = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( NULL != return_prefix && NULL != *return_prefix) { /* use the user specified one, if available */
|
|
||||||
prefix = strdup(*return_prefix);
|
|
||||||
prefix_provided = true;
|
|
||||||
}
|
|
||||||
/* Try to find a proper alternative prefix */
|
|
||||||
else if (NULL != orte_process_info.tmpdir_base) { /* stored value */
|
|
||||||
prefix = strdup(orte_process_info.tmpdir_base);
|
|
||||||
}
|
|
||||||
else { /* General Environment var */
|
|
||||||
prefix = strdup(opal_tmp_directory());
|
|
||||||
}
|
|
||||||
len = strlen(prefix);
|
|
||||||
/* check for a trailing path separator */
|
|
||||||
if (OPAL_PATH_SEP[0] == prefix[len-1]) {
|
|
||||||
prefix[len-1] = '\0';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* BEFORE doing anything else, check to see if this prefix is
|
/* BEFORE doing anything else, check to see if this prefix is
|
||||||
* allowed by the system
|
* allowed by the system
|
||||||
*/
|
*/
|
||||||
if (NULL != orte_prohibited_session_dirs) {
|
if (NULL != orte_prohibited_session_dirs ||
|
||||||
|
NULL != orte_process_info.tmpdir_base ) {
|
||||||
char **list;
|
char **list;
|
||||||
int i, len;
|
int i, len;
|
||||||
/* break the string into tokens - it should be
|
/* break the string into tokens - it should be
|
||||||
@ -291,97 +298,36 @@ orte_session_dir_get_name(char **fulldirpath,
|
|||||||
/* cycle through the list */
|
/* cycle through the list */
|
||||||
for (i=0; i < len; i++) {
|
for (i=0; i < len; i++) {
|
||||||
/* check if prefix matches */
|
/* check if prefix matches */
|
||||||
if (0 == strncmp(prefix, list[i], strlen(list[i]))) {
|
if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) {
|
||||||
/* this is a prohibited location */
|
/* this is a prohibited location */
|
||||||
orte_show_help("help-orte-runtime.txt",
|
orte_show_help("help-orte-runtime.txt",
|
||||||
"orte:session:dir:prohibited",
|
"orte:session:dir:prohibited",
|
||||||
true, prefix, orte_prohibited_session_dirs);
|
true, orte_process_info.tmpdir_base,
|
||||||
|
orte_prohibited_session_dirs);
|
||||||
opal_argv_free(list);
|
opal_argv_free(list);
|
||||||
free(prefix);
|
|
||||||
free(sessions);
|
|
||||||
free(hostname);
|
|
||||||
free(frontend);
|
|
||||||
return ORTE_ERR_FATAL;
|
return ORTE_ERR_FATAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
opal_argv_free(list); /* done with this */
|
opal_argv_free(list); /* done with this */
|
||||||
}
|
}
|
||||||
/*
|
return ORTE_SUCCESS;
|
||||||
* Construct the absolute final path, if requested
|
|
||||||
*/
|
|
||||||
if (NULL != fulldirpath) {
|
|
||||||
*fulldirpath = opal_os_path(false, prefix, sessions, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Return the frontend and prefix, if user requested we do so
|
|
||||||
*/
|
|
||||||
if (NULL != return_frontend) {
|
|
||||||
*return_frontend = strdup(frontend);
|
|
||||||
}
|
|
||||||
if (!prefix_provided && NULL != return_prefix) {
|
|
||||||
*return_prefix = strdup(prefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
cleanup:
|
|
||||||
if(NULL != hostname) {
|
|
||||||
free(hostname);
|
|
||||||
}
|
|
||||||
if(NULL != sessions) {
|
|
||||||
free(sessions);
|
|
||||||
}
|
|
||||||
if (NULL != prefix) {
|
|
||||||
free(prefix);
|
|
||||||
}
|
|
||||||
if (NULL != frontend) {
|
|
||||||
free(frontend);
|
|
||||||
}
|
|
||||||
if (NULL != jobfam) {
|
|
||||||
free(jobfam);
|
|
||||||
}
|
|
||||||
if (NULL != job) {
|
|
||||||
free(job);
|
|
||||||
}
|
|
||||||
if (NULL != vpidstr) {
|
|
||||||
free(vpidstr);
|
|
||||||
}
|
|
||||||
|
|
||||||
return exit_status;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Construct the session directory and create it if necessary
|
* Construct the session directory and create it if necessary
|
||||||
*/
|
*/
|
||||||
int orte_session_dir(bool create,
|
int orte_session_dir(bool create, orte_process_name_t *proc)
|
||||||
char *prefix, char *hostid,
|
|
||||||
orte_process_name_t *proc)
|
|
||||||
{
|
{
|
||||||
char *fulldirpath = NULL,
|
|
||||||
*frontend = NULL,
|
|
||||||
*sav = NULL;
|
|
||||||
int rc = ORTE_SUCCESS;
|
int rc = ORTE_SUCCESS;
|
||||||
char *local_prefix = NULL;
|
|
||||||
|
|
||||||
/* use the specified prefix, if one was given */
|
|
||||||
if (NULL != prefix) {
|
|
||||||
local_prefix = strdup(prefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get the session directory full name
|
* Get the session directory full name
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(&fulldirpath,
|
if (ORTE_SUCCESS != (rc = orte_session_setup_base(proc))) {
|
||||||
&local_prefix,
|
|
||||||
&frontend,
|
|
||||||
hostid,
|
|
||||||
proc))) {
|
|
||||||
if (ORTE_ERR_FATAL == rc) {
|
if (ORTE_ERR_FATAL == rc) {
|
||||||
/* this indicates we should abort quietly */
|
/* this indicates we should abort quietly */
|
||||||
rc = ORTE_ERR_SILENT;
|
rc = ORTE_ERR_SILENT;
|
||||||
goto cleanup;
|
|
||||||
}
|
}
|
||||||
/* otherwise, bark a little first */
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,56 +335,19 @@ int orte_session_dir(bool create,
|
|||||||
* Now that we have the full path, go ahead and create it if necessary
|
* Now that we have the full path, go ahead and create it if necessary
|
||||||
*/
|
*/
|
||||||
if( create ) {
|
if( create ) {
|
||||||
if( ORTE_SUCCESS != (rc = orte_create_dir(fulldirpath) ) ) {
|
if( ORTE_SUCCESS != (rc = orte_create_dir(orte_process_info.proc_session_dir)) ) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* update global structure fields */
|
|
||||||
if (NULL != orte_process_info.tmpdir_base) {
|
|
||||||
free(orte_process_info.tmpdir_base);
|
|
||||||
}
|
|
||||||
orte_process_info.tmpdir_base = strdup(local_prefix);
|
|
||||||
if (NULL != orte_process_info.top_session_dir) {
|
|
||||||
free(orte_process_info.top_session_dir);
|
|
||||||
orte_process_info.top_session_dir = NULL;
|
|
||||||
}
|
|
||||||
if (NULL != frontend) {
|
|
||||||
orte_process_info.top_session_dir = strdup(frontend);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set the process session directory
|
|
||||||
*/
|
|
||||||
if (ORTE_VPID_INVALID != proc->vpid) {
|
|
||||||
if (NULL != orte_process_info.proc_session_dir) {
|
|
||||||
free(orte_process_info.proc_session_dir);
|
|
||||||
}
|
|
||||||
orte_process_info.proc_session_dir = strdup(fulldirpath);
|
|
||||||
|
|
||||||
/* Strip off last part of directory structure */
|
|
||||||
sav = opal_dirname(fulldirpath);
|
|
||||||
free(fulldirpath);
|
|
||||||
fulldirpath = sav;
|
|
||||||
sav = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Set the job session directory
|
|
||||||
*/
|
|
||||||
if (ORTE_JOBID_INVALID != proc->jobid) {
|
|
||||||
if (NULL != orte_process_info.job_session_dir) {
|
|
||||||
free(orte_process_info.job_session_dir);
|
|
||||||
}
|
|
||||||
orte_process_info.job_session_dir = strdup(fulldirpath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
opal_output(0, "procdir: %s",
|
opal_output(0, "procdir: %s",
|
||||||
OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir));
|
OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir));
|
||||||
opal_output(0, "jobdir: %s",
|
opal_output(0, "jobdir: %s",
|
||||||
OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir));
|
OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir));
|
||||||
|
opal_output(0, "top: %s",
|
||||||
|
OMPI_PRINTF_FIX_STRING(orte_process_info.jobfam_session_dir));
|
||||||
opal_output(0, "top: %s",
|
opal_output(0, "top: %s",
|
||||||
OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir));
|
OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir));
|
||||||
opal_output(0, "tmp: %s",
|
opal_output(0, "tmp: %s",
|
||||||
@ -446,16 +355,6 @@ int orte_session_dir(bool create,
|
|||||||
}
|
}
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
if (NULL != local_prefix) {
|
|
||||||
free(local_prefix);
|
|
||||||
}
|
|
||||||
if(NULL != fulldirpath) {
|
|
||||||
free(fulldirpath);
|
|
||||||
}
|
|
||||||
if(NULL != frontend) {
|
|
||||||
free(frontend);
|
|
||||||
}
|
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -466,16 +365,14 @@ int
|
|||||||
orte_session_dir_cleanup(orte_jobid_t jobid)
|
orte_session_dir_cleanup(orte_jobid_t jobid)
|
||||||
{
|
{
|
||||||
int rc = ORTE_SUCCESS;
|
int rc = ORTE_SUCCESS;
|
||||||
char *tmp = NULL;
|
|
||||||
char *job_session_dir=NULL;
|
|
||||||
|
|
||||||
if (!orte_create_session_dirs ) {
|
if (!orte_create_session_dirs ) {
|
||||||
/* didn't create them */
|
/* we haven't created them */
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NULL == orte_process_info.tmpdir_base &&
|
if (NULL == orte_process_info.job_session_dir ||
|
||||||
NULL == orte_process_info.top_session_dir) {
|
NULL == orte_process_info.proc_session_dir) {
|
||||||
/* this should never happen - it means we are calling
|
/* this should never happen - it means we are calling
|
||||||
* cleanup *before* properly setting up the session
|
* cleanup *before* properly setting up the session
|
||||||
* dir system. This leaves open the possibility of
|
* dir system. This leaves open the possibility of
|
||||||
@ -486,37 +383,30 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
|
|||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* need to setup the top_session_dir with the prefix */
|
|
||||||
tmp = opal_os_path(false,
|
|
||||||
orte_process_info.tmpdir_base,
|
|
||||||
orte_process_info.top_session_dir, NULL);
|
|
||||||
|
|
||||||
/* we can only blow away session directories for our job family */
|
|
||||||
job_session_dir = orte_build_job_session_dir(tmp, ORTE_PROC_MY_NAME, jobid);
|
|
||||||
if (NULL == job_session_dir) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* recursively blow the whole session away for our job family,
|
/* recursively blow the whole session away for our job family,
|
||||||
* saving only output files
|
* saving only output files
|
||||||
*/
|
*/
|
||||||
opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file);
|
opal_os_dirpath_destroy(orte_process_info.job_session_dir,
|
||||||
|
true, orte_dir_check_file);
|
||||||
|
|
||||||
/* now attempt to eliminate the top level directory itself - this
|
/* now attempt to eliminate the top level directory itself - this
|
||||||
* will fail if anything is present, but ensures we cleanup if
|
* will fail if anything is present, but ensures we cleanup if
|
||||||
* we are the last one out
|
* we are the last one out
|
||||||
*/
|
*/
|
||||||
opal_os_dirpath_destroy(tmp, false, orte_dir_check_file);
|
if( NULL != orte_process_info.top_session_dir ){
|
||||||
|
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
|
||||||
|
false, orte_dir_check_file);
|
||||||
|
}
|
||||||
|
|
||||||
if (NULL != job_session_dir && opal_os_dirpath_is_empty(job_session_dir)) {
|
if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting");
|
opal_output(0, "sess_dir_cleanup: found job session dir empty - deleting");
|
||||||
}
|
}
|
||||||
rmdir(job_session_dir);
|
rmdir(orte_process_info.job_session_dir);
|
||||||
} else {
|
} else {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) {
|
if (OPAL_ERR_NOT_FOUND ==
|
||||||
|
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
|
||||||
opal_output(0, "sess_dir_cleanup: job session dir does not exist");
|
opal_output(0, "sess_dir_cleanup: job session dir does not exist");
|
||||||
} else {
|
} else {
|
||||||
opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving");
|
opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving");
|
||||||
@ -525,24 +415,27 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
|
|||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opal_os_dirpath_is_empty(tmp)) {
|
if ( NULL != orte_process_info.top_session_dir ){
|
||||||
|
|
||||||
|
if( opal_os_dirpath_is_empty(orte_process_info.top_session_dir) ) {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
|
opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
|
||||||
}
|
}
|
||||||
rmdir(tmp);
|
rmdir(orte_process_info.top_session_dir);
|
||||||
} else {
|
} else {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) {
|
if (OPAL_ERR_NOT_FOUND ==
|
||||||
|
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
|
||||||
opal_output(0, "sess_dir_cleanup: top session dir does not exist");
|
opal_output(0, "sess_dir_cleanup: top session dir does not exist");
|
||||||
} else {
|
} else {
|
||||||
opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
|
opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CLEANUP:
|
CLEANUP:
|
||||||
if (NULL != tmp) free(tmp);
|
|
||||||
if (NULL != job_session_dir) free(job_session_dir);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -555,62 +448,40 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
char *job_session_dir, *vpid, *proc_session_dir;
|
char *job_session_dir, *vpid, *proc_session_dir;
|
||||||
|
|
||||||
if (!orte_create_session_dirs ) {
|
if (!orte_create_session_dirs ) {
|
||||||
/* didn't create them */
|
/* we haven't created them */
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NULL == orte_process_info.tmpdir_base &&
|
if (NULL == orte_process_info.job_session_dir ||
|
||||||
NULL == orte_process_info.top_session_dir) {
|
NULL == orte_process_info.proc_session_dir) {
|
||||||
/* this should never happen - it means we are calling
|
/* this should never happen - it means we are calling
|
||||||
* cleanup *before* properly setting up the session
|
* cleanup *before* properly setting up the session
|
||||||
* dir system. Protect against the possibility of
|
* dir system. This leaves open the possibility of
|
||||||
* accidentally removing directories we shouldn't
|
* accidentally removing directories we shouldn't
|
||||||
* touch by returning
|
* touch
|
||||||
*/
|
*/
|
||||||
return ORTE_ERR_NOT_INITIALIZED;
|
rc = ORTE_ERR_NOT_INITIALIZED;
|
||||||
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* need to setup the top_session_dir with the prefix */
|
opal_os_dirpath_destroy(orte_process_info.proc_session_dir,
|
||||||
tmp = opal_os_path(false,
|
|
||||||
orte_process_info.tmpdir_base,
|
|
||||||
orte_process_info.top_session_dir, NULL);
|
|
||||||
|
|
||||||
/* define the proc and job session directories for this process */
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, proc->vpid))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
free(tmp);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
job_session_dir = orte_build_job_session_dir(tmp, proc, proc->jobid);
|
|
||||||
if( NULL == job_session_dir) {
|
|
||||||
free(tmp);
|
|
||||||
free(vpid);
|
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
}
|
|
||||||
proc_session_dir = opal_os_path( false, job_session_dir, vpid, NULL );
|
|
||||||
if( NULL == proc_session_dir ) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
free(tmp);
|
|
||||||
free(vpid);
|
|
||||||
free(job_session_dir);
|
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
}
|
|
||||||
|
|
||||||
opal_os_dirpath_destroy(proc_session_dir,
|
|
||||||
false, orte_dir_check_file);
|
false, orte_dir_check_file);
|
||||||
opal_os_dirpath_destroy(job_session_dir,
|
opal_os_dirpath_destroy(orte_process_info.job_session_dir,
|
||||||
false, orte_dir_check_file);
|
false, orte_dir_check_file);
|
||||||
opal_os_dirpath_destroy(tmp,
|
if( NULL != orte_process_info.top_session_dir ){
|
||||||
|
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
|
||||||
false, orte_dir_check_file);
|
false, orte_dir_check_file);
|
||||||
|
}
|
||||||
|
|
||||||
if (opal_os_dirpath_is_empty(proc_session_dir)) {
|
if (opal_os_dirpath_is_empty(orte_process_info.proc_session_dir)) {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting");
|
opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting");
|
||||||
}
|
}
|
||||||
rmdir(proc_session_dir);
|
rmdir(orte_process_info.proc_session_dir);
|
||||||
} else {
|
} else {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(proc_session_dir, 0)) {
|
if (OPAL_ERR_NOT_FOUND ==
|
||||||
|
opal_os_dirpath_access(orte_process_info.proc_session_dir, 0)) {
|
||||||
opal_output(0, "sess_dir_finalize: proc session dir does not exist");
|
opal_output(0, "sess_dir_finalize: proc session dir does not exist");
|
||||||
} else {
|
} else {
|
||||||
opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving");
|
opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving");
|
||||||
@ -619,14 +490,15 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opal_os_dirpath_is_empty(job_session_dir)) {
|
if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
|
opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
|
||||||
}
|
}
|
||||||
rmdir(job_session_dir);
|
rmdir(orte_process_info.job_session_dir);
|
||||||
} else {
|
} else {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(job_session_dir, 0)) {
|
if (OPAL_ERR_NOT_FOUND ==
|
||||||
|
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
|
||||||
opal_output(0, "sess_dir_finalize: job session dir does not exist");
|
opal_output(0, "sess_dir_finalize: job session dir does not exist");
|
||||||
} else {
|
} else {
|
||||||
opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
|
opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
|
||||||
@ -635,26 +507,25 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opal_os_dirpath_is_empty(tmp)) {
|
if(NULL != orte_process_info.top_session_dir) {
|
||||||
|
if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
|
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
|
||||||
}
|
}
|
||||||
rmdir(tmp);
|
rmdir(tmp);
|
||||||
} else {
|
} else {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
if (OPAL_ERR_NOT_FOUND == opal_os_dirpath_access(tmp, 0)) {
|
if (OPAL_ERR_NOT_FOUND ==
|
||||||
|
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
|
||||||
opal_output(0, "sess_dir_finalize: top session dir does not exist");
|
opal_output(0, "sess_dir_finalize: top session dir does not exist");
|
||||||
} else {
|
} else {
|
||||||
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
|
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
CLEANUP:
|
CLEANUP:
|
||||||
free(tmp);
|
|
||||||
free(vpid);
|
|
||||||
free(job_session_dir);
|
|
||||||
free(proc_session_dir);
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -680,33 +551,3 @@ orte_dir_check_file(const char *root, const char *path)
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *orte_build_job_session_dir(char *top_dir,
|
|
||||||
orte_process_name_t *proc,
|
|
||||||
orte_jobid_t jobid)
|
|
||||||
{
|
|
||||||
char *job_session_dir;
|
|
||||||
|
|
||||||
if (ORTE_JOBID_WILDCARD != jobid) {
|
|
||||||
char *job = NULL;
|
|
||||||
|
|
||||||
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(jobid))) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
job_session_dir = NULL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, job, NULL);
|
|
||||||
free(job);
|
|
||||||
if (NULL == job_session_dir) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
job_session_dir = opal_os_path(false, top_dir, orte_process_info.jobfam_session_dir, NULL);
|
|
||||||
if( NULL == job_session_dir) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
return job_session_dir;
|
|
||||||
}
|
|
||||||
|
@ -99,19 +99,6 @@ BEGIN_C_DECLS
|
|||||||
* locate an already existing universe for reconnection
|
* locate an already existing universe for reconnection
|
||||||
* purposes. If set to "true", then the function
|
* purposes. If set to "true", then the function
|
||||||
* creates the directory, if possible.
|
* creates the directory, if possible.
|
||||||
* @param prefix A string variable indicating where the user
|
|
||||||
* stipulated the directory should be found or
|
|
||||||
* placed. A value of "NULL" indicates that the user
|
|
||||||
* specified no location - hence, the function explores
|
|
||||||
* a range of "standard" locations.
|
|
||||||
* @param hostid Name of the host on which the session directory is
|
|
||||||
* being built. Used to build the name of the
|
|
||||||
* "openmpi-sessions-[user]@[host]:[batch]" branch of
|
|
||||||
* the directory tree. NULL indicates that the nodename
|
|
||||||
* found in orte_process_info is to be used.
|
|
||||||
* @param batchid Batch job name, used in batch scheduling
|
|
||||||
* systems. NULL indicates that the default of "0" is
|
|
||||||
* to be used.
|
|
||||||
* @param proc Pointer to a process name for which the session
|
* @param proc Pointer to a process name for which the session
|
||||||
* dir name is desired
|
* dir name is desired
|
||||||
*
|
*
|
||||||
@ -120,18 +107,13 @@ BEGIN_C_DECLS
|
|||||||
* @retval OMPI_ERROR The directory cannot be found (if create is
|
* @retval OMPI_ERROR The directory cannot be found (if create is
|
||||||
* "false") or created (if create is "true").
|
* "false") or created (if create is "true").
|
||||||
*/
|
*/
|
||||||
ORTE_DECLSPEC int orte_session_dir(bool create, char *prefix, char *hostid,
|
ORTE_DECLSPEC int orte_session_dir(bool create, orte_process_name_t *proc);
|
||||||
orte_process_name_t *proc);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Construct the session directory name from the input parameters.
|
* Setup session-related directory paths
|
||||||
* This function does no checking that the directory exists, or can be used
|
|
||||||
*/
|
*/
|
||||||
ORTE_DECLSPEC int orte_session_dir_get_name(char **fulldirpath,
|
ORTE_DECLSPEC int orte_session_setup_base(orte_process_name_t *proc);
|
||||||
char **prfx,
|
|
||||||
char **frontend,
|
|
||||||
char *hostid,
|
|
||||||
orte_process_name_t *proc);
|
|
||||||
|
|
||||||
/** The orte_session_dir_finalize() function performs a cleanup of the
|
/** The orte_session_dir_finalize() function performs a cleanup of the
|
||||||
* session directory tree. It first removes the session directory for
|
* session directory tree. It first removes the session directory for
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user