Patch from Nadia/Bull from the opal-sos HG branch:
orte_session_dir_finalize doesn't clean the right directories. orte_session_dir_cleanup neither. This patch fixes several issues: 1. orte_session_dir_cleanup(): 1. when jobid is not a wildcard, jobid is used to build the job session dir (instead of ORTE_LOCAL_JOBID). 1. ORTE_SUCCESS is unconditionally returned (instead of rc that might have been previously set to another value). 1. orte_session_dir_finalize(): 1. convert_jobid_to_string is not the right call to get the job session dir. 1. in some places orte_process_info.top_session_dir is directly used, without being prefixed with the base directory. Factorized the code sections that build the job_session_dir into a single orte_build_job_session_dir() function that is now called by both orte_session_dir_finalize() and orte_session_dir_cleanup(). Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net> This commit was SVN r21498.
Этот коммит содержится в:
родитель
938599cb2d
Коммит
ecaa00ba73
@ -70,6 +70,10 @@ static int orte_create_dir(char *directory);
|
|||||||
static bool orte_dir_check_file(const char *root, const char *path);
|
static bool orte_dir_check_file(const char *root, const char *path);
|
||||||
static bool orte_dir_check_file_output(const char *root, const char *path);
|
static bool orte_dir_check_file_output(const char *root, const char *path);
|
||||||
|
|
||||||
|
static char *orte_build_job_session_dir(char *top_dir,
|
||||||
|
orte_process_name_t *proc,
|
||||||
|
orte_jobid_t jobid);
|
||||||
|
|
||||||
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
|
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
|
||||||
|
|
||||||
/****************************
|
/****************************
|
||||||
@ -541,9 +545,9 @@ int orte_session_dir(bool create,
|
|||||||
int
|
int
|
||||||
orte_session_dir_cleanup(orte_jobid_t jobid)
|
orte_session_dir_cleanup(orte_jobid_t jobid)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc = ORTE_SUCCESS;
|
||||||
char *tmp;
|
char *tmp;
|
||||||
char *jobfam=NULL, *job=NULL, *job_session_dir=NULL;
|
char *job_session_dir=NULL;
|
||||||
|
|
||||||
/* need to setup the top_session_dir with the prefix */
|
/* need to setup the top_session_dir with the prefix */
|
||||||
tmp = opal_os_path(false,
|
tmp = opal_os_path(false,
|
||||||
@ -551,35 +555,19 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
|
|||||||
orte_process_info.top_session_dir, NULL);
|
orte_process_info.top_session_dir, NULL);
|
||||||
|
|
||||||
/* we can only blow away session directories for our job family */
|
/* we can only blow away session directories for our job family */
|
||||||
if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid))) {
|
job_session_dir = orte_build_job_session_dir(tmp, ORTE_PROC_MY_NAME, jobid);
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
if (NULL == job_session_dir) {
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ORTE_JOBID_WILDCARD != jobid) {
|
if (ORTE_JOBID_WILDCARD != jobid) {
|
||||||
|
opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file);
|
||||||
if (0 > asprintf(&job, "%d", jobid)) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
job_session_dir = opal_os_path(false, tmp, jobfam, job, NULL );
|
|
||||||
if( NULL == job_session_dir ) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
opal_os_dirpath_destroy(job_session_dir,
|
|
||||||
true, orte_dir_check_file);
|
|
||||||
} else {
|
} else {
|
||||||
/* if we want the session_dir removed for ALL jobids, then
|
/* if we want the session_dir removed for ALL jobids, then
|
||||||
* just recursively blow the whole session away for our job family,
|
* just recursively blow the whole session away for our job family,
|
||||||
* saving only output files
|
* saving only output files
|
||||||
*/
|
*/
|
||||||
job_session_dir = opal_os_path(false, tmp, jobfam, NULL);
|
|
||||||
opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file_output);
|
opal_os_dirpath_destroy(job_session_dir, true, orte_dir_check_file_output);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -614,10 +602,8 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
|
|||||||
|
|
||||||
CLEANUP:
|
CLEANUP:
|
||||||
free(tmp);
|
free(tmp);
|
||||||
if (NULL != job) free(job);
|
|
||||||
if (NULL != jobfam) free(jobfam);
|
|
||||||
if (NULL != job_session_dir) free(job_session_dir);
|
if (NULL != job_session_dir) free(job_session_dir);
|
||||||
return ORTE_SUCCESS;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -626,7 +612,7 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
char *tmp;
|
char *tmp;
|
||||||
char *job, *job_session_dir, *vpid, *proc_session_dir;
|
char *job_session_dir, *vpid, *proc_session_dir;
|
||||||
|
|
||||||
/* need to setup the top_session_dir with the prefix */
|
/* need to setup the top_session_dir with the prefix */
|
||||||
tmp = opal_os_path(false,
|
tmp = opal_os_path(false,
|
||||||
@ -634,23 +620,14 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
orte_process_info.top_session_dir, NULL);
|
orte_process_info.top_session_dir, NULL);
|
||||||
|
|
||||||
/* define the proc and job session directories for this process */
|
/* define the proc and job session directories for this process */
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&job, proc->jobid))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
free(tmp);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, proc->vpid))) {
|
if (ORTE_SUCCESS != (rc = orte_util_convert_vpid_to_string(&vpid, proc->vpid))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
free(tmp);
|
free(tmp);
|
||||||
free(job);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
job_session_dir = opal_os_path( false, orte_process_info.top_session_dir,
|
job_session_dir = orte_build_job_session_dir(tmp, proc, proc->jobid);
|
||||||
job, NULL );
|
if( NULL == job_session_dir) {
|
||||||
if( NULL == job_session_dir ) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
|
||||||
free(tmp);
|
free(tmp);
|
||||||
free(job);
|
|
||||||
free(vpid);
|
free(vpid);
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
@ -658,7 +635,6 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
if( NULL == proc_session_dir ) {
|
if( NULL == proc_session_dir ) {
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
free(tmp);
|
free(tmp);
|
||||||
free(job);
|
|
||||||
free(vpid);
|
free(vpid);
|
||||||
free(job_session_dir);
|
free(job_session_dir);
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
@ -668,8 +644,6 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
false, orte_dir_check_file);
|
false, orte_dir_check_file);
|
||||||
opal_os_dirpath_destroy(job_session_dir,
|
opal_os_dirpath_destroy(job_session_dir,
|
||||||
false, orte_dir_check_file);
|
false, orte_dir_check_file);
|
||||||
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
|
|
||||||
false, orte_dir_check_file);
|
|
||||||
opal_os_dirpath_destroy(tmp,
|
opal_os_dirpath_destroy(tmp,
|
||||||
false, orte_dir_check_file);
|
false, orte_dir_check_file);
|
||||||
|
|
||||||
@ -697,18 +671,6 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
|
|
||||||
if (orte_debug_flag) {
|
|
||||||
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
|
|
||||||
}
|
|
||||||
rmdir(orte_process_info.top_session_dir);
|
|
||||||
} else {
|
|
||||||
if (orte_debug_flag) {
|
|
||||||
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
|
|
||||||
}
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opal_os_dirpath_is_empty(tmp)) {
|
if (opal_os_dirpath_is_empty(tmp)) {
|
||||||
if (orte_debug_flag) {
|
if (orte_debug_flag) {
|
||||||
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
|
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
|
||||||
@ -722,7 +684,6 @@ orte_session_dir_finalize(orte_process_name_t *proc)
|
|||||||
|
|
||||||
CLEANUP:
|
CLEANUP:
|
||||||
free(tmp);
|
free(tmp);
|
||||||
free(job);
|
|
||||||
free(vpid);
|
free(vpid);
|
||||||
free(job_session_dir);
|
free(job_session_dir);
|
||||||
free(proc_session_dir);
|
free(proc_session_dir);
|
||||||
@ -759,3 +720,39 @@ orte_dir_check_file_output(const char *root, const char *path)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char *orte_build_job_session_dir(char *top_dir,
|
||||||
|
orte_process_name_t *proc,
|
||||||
|
orte_jobid_t jobid)
|
||||||
|
{
|
||||||
|
char *jobfam = NULL;
|
||||||
|
char *job_session_dir;
|
||||||
|
|
||||||
|
if (0 > asprintf(&jobfam, "%d", ORTE_JOB_FAMILY(proc->jobid))) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ORTE_JOBID_WILDCARD != jobid) {
|
||||||
|
char *job = NULL;
|
||||||
|
|
||||||
|
if (0 > asprintf(&job, "%d", ORTE_LOCAL_JOBID(jobid))) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
job_session_dir = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
job_session_dir = opal_os_path(false, top_dir, jobfam, job, NULL);
|
||||||
|
free(job);
|
||||||
|
if (NULL == job_session_dir) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
job_session_dir = opal_os_path(false, top_dir, jobfam, NULL);
|
||||||
|
if( NULL == job_session_dir) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
free(jobfam);
|
||||||
|
return job_session_dir;
|
||||||
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user