1
1

Add PMIx key to provide RM with ability to indicate that it will cleanup

session directories provided at through OPAL_PMIX_TMPDIR,
OPAL_PMIX_NSDIR, OPAL_PMIX_PROCDIR
Этот коммит содержится в:
Artem Polyakov 2016-09-02 00:23:30 +07:00
родитель 81195ab724
Коммит dc0ab674de
4 изменённых файлов: 40 добавлений и 23 удалений

Просмотреть файл

@ -75,6 +75,7 @@ BEGIN_C_DECLS
#define OPAL_PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session #define OPAL_PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session
#define OPAL_PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace #define OPAL_PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace
#define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc #define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc
#define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
/* information about relative ranks as assigned by the RM */ /* information about relative ranks as assigned by the RM */
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler #define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler

Просмотреть файл

@ -94,6 +94,7 @@ static int rte_init(void)
uint16_t u16, *u16ptr; uint16_t u16, *u16ptr;
char **peers=NULL, *mycpuset, **cpusets=NULL; char **peers=NULL, *mycpuset, **cpusets=NULL;
opal_process_name_t wildcard_rank, pname; opal_process_name_t wildcard_rank, pname;
bool bool_val, tdir_mca_override = false;
size_t i; size_t i;
/* run the prolog */ /* run the prolog */
@ -246,37 +247,51 @@ static int rte_init(void)
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING); OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TMPDIR, &wildcard_rank, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) { if (OPAL_SUCCESS == ret && NULL != val) {
/* TODO: who has precedence - pmix of MCA setting??? */ /* TODO: who has precedence - pmix of MCA setting??? */
if( NULL == orte_process_info.top_session_dir ){ if( NULL != orte_process_info.top_session_dir ){
orte_process_info.top_session_dir = val; orte_process_info.top_session_dir = val;
} else { } else {
/* keep the MCA setting */ /* keep the MCA setting */
tdir_mca_override = true;
free(val); free(val);
} }
val = NULL; val = NULL;
} }
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING); if( !tdir_mca_override ){
if (OPAL_SUCCESS == ret && NULL != val) { OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_NSDIR, &wildcard_rank, &val, OPAL_STRING);
/* TODO: who has precedence - pmix of MCA setting??? */ if (OPAL_SUCCESS == ret && NULL != val) {
if( NULL == orte_process_info.job_session_dir ){ /* TODO: who has precedence - pmix of MCA setting??? */
orte_process_info.job_session_dir = val; if( NULL == orte_process_info.job_session_dir ){
} else { orte_process_info.job_session_dir = val;
/* keep the MCA setting */ } else {
free(val); /* keep the MCA setting */
free(val);
tdir_mca_override = true;
}
val = NULL;
} }
val = NULL;
} }
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING); if( !tdir_mca_override ){
if (OPAL_SUCCESS == ret && NULL != val) { OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_PROCDIR, &wildcard_rank, &val, OPAL_STRING);
/* TODO: who has precedence - pmix of MCA setting??? */ if (OPAL_SUCCESS == ret && NULL != val) {
if( NULL == orte_process_info.proc_session_dir ){ /* TODO: who has precedence - pmix of MCA setting??? */
orte_process_info.proc_session_dir = val; if( NULL == orte_process_info.proc_session_dir ){
} else { orte_process_info.proc_session_dir = val;
/* keep the MCA setting */ } else {
free(val); /* keep the MCA setting */
tdir_mca_override = true;
free(val);
}
val = NULL;
}
}
if( !tdir_mca_override ){
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_TDIR_RMCLEAN, &wildcard_rank, &bool_val, OPAL_BOOL);
if (OPAL_SUCCESS == ret ) {
orte_process_info.rm_session_dirs = val;
} }
val = NULL;
} }
/* retrieve our topology */ /* retrieve our topology */

Просмотреть файл

@ -122,6 +122,7 @@ struct orte_proc_info_t {
char *jobfam_session_dir; /**< Session directory for this family of jobs (i.e., share same mpirun) */ char *jobfam_session_dir; /**< Session directory for this family of jobs (i.e., share same mpirun) */
char *job_session_dir; /**< Session directory for job */ char *job_session_dir; /**< Session directory for job */
char *proc_session_dir; /**< Session directory for the process */ char *proc_session_dir; /**< Session directory for the process */
bool rm_session_dirs; /**< Session directories will be cleaned up by RM */
char *sock_stdin; /**< Path name to temp file for stdin. */ char *sock_stdin; /**< Path name to temp file for stdin. */
char *sock_stdout; /**< Path name to temp file for stdout. */ char *sock_stdout; /**< Path name to temp file for stdout. */

Просмотреть файл

@ -366,8 +366,8 @@ orte_session_dir_cleanup(orte_jobid_t jobid)
{ {
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
if (!orte_create_session_dirs ) { if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
/* we haven't created them */ /* we haven't created them or RM will clean them up for us*/
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -447,8 +447,8 @@ orte_session_dir_finalize(orte_process_name_t *proc)
char *tmp; char *tmp;
char *job_session_dir, *vpid, *proc_session_dir; char *job_session_dir, *vpid, *proc_session_dir;
if (!orte_create_session_dirs ) { if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
/* we haven't created them */ /* we haven't created them or RM will clean them up for us*/
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }