Update PMIx and PRRTE
Deprecate --am and --amca options Avoid default param files on backend nodes Any parameters in the PRRTE default or user param files will have been picked up by prte and included in the environment sent to the prted, so don't open those files on the backend. Avoid picking up MCA param file info on backend Avoid the scaling problem at PRRTE startup by only reading the system and user param files on the frontend. Complete revisions to cmd line parser for OMPI Per specification, enforce following precedence order: 1. system-level default parameter file 1. user-level default parameter file 1. Anything found in the environment 1. "--tune" files. Note that "--amca" goes away and becomes equivalent to "--tune". Okay if it is provided more than once on a cmd line (we will aggregate the list of files, retaining order), but an error if a parameter is referenced in more than one file with a different value 1. "--mca" options. Again, error if the same option appears more than once with a different value. Allowed to override a parameter referenced in a "tune" file 1. "-x" options. Allowed to overwrite options given in a "tune" file, but cannot conflict with an explicit "--mca" option 1. all other options Fix special handling of "-np" Get agreement on jobid across the layers Need all three pieces (PRRTE, PMIx, and OPAL) to agree on the nspace conversion to jobid method Ensure prte show_help messages get output Print abnormal termination messages Cleanup error reporting in persistent operations Signed-off-by: Ralph Castain <rhc@pmix.org> dd Signed-off-by: Ralph Castain <rhc@pmix.org>
Этот коммит содержится в:
родитель
f9575ed026
Коммит
1cf972dcaf
@ -110,18 +110,14 @@ int opal_pmix_convert_jobid(pmix_nspace_t nspace, opal_jobid_t jobid)
|
||||
/* zero out the nspace */
|
||||
PMIX_LOAD_NSPACE(nspace, NULL);
|
||||
|
||||
if (opal_process_info.nativelaunch) {
|
||||
opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid);
|
||||
return OPAL_SUCCESS;
|
||||
} else {
|
||||
/* cycle across our list of known jobids */
|
||||
OPAL_LIST_FOREACH(nptr, &localnspaces, opal_nptr_t) {
|
||||
if (jobid == nptr->jobid) {
|
||||
PMIX_LOAD_NSPACE(nspace, nptr->nspace);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
/* cycle across our list of known jobids */
|
||||
OPAL_LIST_FOREACH(nptr, &localnspaces, opal_nptr_t) {
|
||||
if (jobid == nptr->jobid) {
|
||||
PMIX_LOAD_NSPACE(nspace, nptr->nspace);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
@ -129,29 +125,55 @@ int opal_pmix_convert_nspace(opal_jobid_t *jobid, pmix_nspace_t nspace)
|
||||
{
|
||||
opal_nptr_t *nptr;
|
||||
opal_jobid_t jid;
|
||||
uint16_t jobfam;
|
||||
uint32_t hash32, localjob = 0;
|
||||
char *p = NULL;
|
||||
|
||||
/* set a default */
|
||||
*jobid = OPAL_JOBID_INVALID;
|
||||
|
||||
if (opal_process_info.nativelaunch) {
|
||||
return opal_convert_string_to_jobid(jobid, nspace);
|
||||
} else {
|
||||
/* cycle across our list of known jobids */
|
||||
OPAL_LIST_FOREACH(nptr, &localnspaces, opal_nptr_t) {
|
||||
if (PMIX_CHECK_NSPACE(nspace, nptr->nspace)) {
|
||||
*jobid = nptr->jobid;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
/* if we get here, we don't know this nspace */
|
||||
OPAL_HASH_STR(nspace, jid);
|
||||
jid &= ~(0x8000);
|
||||
*jobid = jid;
|
||||
nptr = OBJ_NEW(opal_nptr_t);
|
||||
nptr->jobid = jid;
|
||||
PMIX_LOAD_NSPACE(nptr->nspace, nspace);
|
||||
opal_list_append(&localnspaces, &nptr->super);
|
||||
/* if the nspace is empty, there is nothing more to do */
|
||||
if (0 == strlen(nspace)) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
if (NULL != strstr(nspace, "JOBID_WILDCARD")) {
|
||||
*jobid = OPAL_JOBID_WILDCARD;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
if (NULL != strstr(nspace, "JOBID_INVALID")) {
|
||||
*jobid = OPAL_JOBID_INVALID;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* cycle across our list of known jobids */
|
||||
OPAL_LIST_FOREACH(nptr, &localnspaces, opal_nptr_t) {
|
||||
if (PMIX_CHECK_NSPACE(nspace, nptr->nspace)) {
|
||||
*jobid = nptr->jobid;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
/* if we get here, we don't know this nspace */
|
||||
/* find the "." at the end that indicates the child job */
|
||||
if (NULL != (p = strrchr(nspace, '.'))) {
|
||||
*p = '\0';
|
||||
}
|
||||
OPAL_HASH_STR(nspace, hash32);
|
||||
if (NULL != p) {
|
||||
*p = '.';
|
||||
++p;
|
||||
localjob = strtoul(p, NULL, 10);
|
||||
}
|
||||
|
||||
/* now compress to 16-bits */
|
||||
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
|
||||
jid = (0xffff0000 & ((uint32_t)jobfam << 16)) | (0x0000ffff & localjob);
|
||||
*jobid = jid;
|
||||
/* save this jobid/nspace pair */
|
||||
nptr = OBJ_NEW(opal_nptr_t);
|
||||
nptr->jobid = jid;
|
||||
PMIX_LOAD_NSPACE(nptr->nspace, nspace);
|
||||
opal_list_append(&localnspaces, &nptr->super);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -595,9 +595,11 @@ OPAL_DECLSPEC int opal_pmix_convert_nspace(opal_jobid_t *jobid, pmix_nspace_t ns
|
||||
OPAL_DECLSPEC void opal_pmix_setup_nspace_tracker(void);
|
||||
OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void);
|
||||
|
||||
/* convert jobid to nspace */
|
||||
#define OPAL_PMIX_CONVERT_JOBID(n, j) \
|
||||
opal_pmix_convert_jobid((n), (j))
|
||||
|
||||
/* convert vpid to rank */
|
||||
#define OPAL_PMIX_CONVERT_VPID(r, v) \
|
||||
do { \
|
||||
if (OPAL_VPID_WILDCARD == (v)) { \
|
||||
@ -607,6 +609,7 @@ OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void);
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
/* convert opal_process_name_t to pmix_proc_t */
|
||||
#define OPAL_PMIX_CONVERT_NAME(p, n) \
|
||||
do { \
|
||||
OPAL_PMIX_CONVERT_JOBID((p)->nspace, (n)->jobid); \
|
||||
@ -614,9 +617,11 @@ OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void);
|
||||
} while(0)
|
||||
|
||||
|
||||
/* convert nspace to jobid */
|
||||
#define OPAL_PMIX_CONVERT_NSPACE(r, j, n) \
|
||||
(r) = opal_pmix_convert_nspace((j), (n))
|
||||
|
||||
/* convert pmix rank to opal vpid */
|
||||
#define OPAL_PMIX_CONVERT_RANK(v, r) \
|
||||
do { \
|
||||
if (PMIX_RANK_WILDCARD == (r)) { \
|
||||
@ -628,6 +633,7 @@ OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void);
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
/* convert pmix_proc_t to opal_process_name_t */
|
||||
#define OPAL_PMIX_CONVERT_PROCT(r, n, p) \
|
||||
do { \
|
||||
OPAL_PMIX_CONVERT_NSPACE((r), &(n)->jobid, (p)->nspace); \
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit a18e53138298d61a01fec4471518140304539e8c
|
||||
Subproject commit 4c62a26b319ba78feadc42679200e93041f611a2
|
2
prrte
2
prrte
@ -1 +1 @@
|
||||
Subproject commit cdea5231171b2fdea11269033de9e265fc7f3a63
|
||||
Subproject commit 8d673047b325a148f55c65e049aab67f1de1d318
|
Загрузка…
Ссылка в новой задаче
Block a user