Merge pull request #7441 from rhc54/topic/hack
Create a hack to protect against non-integer jobids
Этот коммит содержится в:
Коммит
f9643b84b9
@ -65,6 +65,8 @@ pmix_process_info_t pmix_process_info = {0};
|
||||
bool pmix_proc_is_bound = false;
|
||||
bool ompi_singleton = false;
|
||||
|
||||
static pmix_proc_t myprocid;
|
||||
|
||||
static bool added_transport_keys = false;
|
||||
static bool added_num_procs = false;
|
||||
static bool added_app_ctx = false;
|
||||
@ -498,7 +500,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
opal_process_name_t pname;
|
||||
pmix_proc_t myproc, rproc;
|
||||
pmix_proc_t rproc;
|
||||
int u32, *u32ptr;
|
||||
uint16_t u16, *u16ptr;
|
||||
char **peers=NULL;
|
||||
@ -530,8 +532,11 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup our internal nspace hack */
|
||||
opal_pmix_setup_nspace_tracker();
|
||||
|
||||
/* initialize the selected module */
|
||||
if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&myproc, NULL, 0)))) {
|
||||
if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&myprocid, NULL, 0)))) {
|
||||
/* we cannot run - this could be due to being direct launched
|
||||
* without the required PMI support being built, so print
|
||||
* out a help message indicating it */
|
||||
@ -539,8 +544,8 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
return OPAL_ERR_SILENT;
|
||||
}
|
||||
|
||||
/* setup the process name fields */
|
||||
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &myproc);
|
||||
/* setup the process name fields - also registers the new nspace */
|
||||
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &myprocid);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
@ -548,6 +553,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
|
||||
OPAL_PROC_MY_NAME.vpid = pname.vpid;
|
||||
pmix_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid;
|
||||
pmix_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid;
|
||||
|
||||
/* set our hostname */
|
||||
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_HOSTNAME, &OPAL_PROC_MY_NAME,
|
||||
(char**)&ev1, PMIX_STRING);
|
||||
@ -828,6 +834,10 @@ int ompi_rte_finalize(void)
|
||||
free (pmix_process_info.cpuset);
|
||||
pmix_process_info.cpuset = NULL;
|
||||
|
||||
/* cleanup our internal nspace hack */
|
||||
opal_pmix_finalize_nspace_tracker();
|
||||
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -77,6 +77,83 @@ int opal_pmix_base_exchange(pmix_info_t *indat,
|
||||
return opal_pmix_convert_status(rc);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
pmix_nspace_t nspace;
|
||||
opal_jobid_t jobid;
|
||||
} opal_nptr_t;
|
||||
static OBJ_CLASS_INSTANCE(opal_nptr_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static opal_list_t localnspaces;
|
||||
|
||||
void opal_pmix_setup_nspace_tracker(void)
|
||||
{
|
||||
/* check if we were launched by PRRTE */
|
||||
if (NULL != getenv("PRRTE_LAUNCHED")) {
|
||||
opal_process_info.nativelaunch = true;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&localnspaces, opal_list_t);
|
||||
}
|
||||
|
||||
void opal_pmix_finalize_nspace_tracker(void)
|
||||
{
|
||||
OPAL_LIST_DESTRUCT(&localnspaces);
|
||||
}
|
||||
|
||||
int opal_pmix_convert_jobid(pmix_nspace_t nspace, opal_jobid_t jobid)
|
||||
{
|
||||
opal_nptr_t *nptr;
|
||||
|
||||
/* zero out the nspace */
|
||||
PMIX_LOAD_NSPACE(nspace, NULL);
|
||||
|
||||
if (opal_process_info.nativelaunch) {
|
||||
opal_snprintf_jobid(nspace, PMIX_MAX_NSLEN, jobid);
|
||||
return OPAL_SUCCESS;
|
||||
} else {
|
||||
/* cycle across our list of known jobids */
|
||||
OPAL_LIST_FOREACH(nptr, &localnspaces, opal_nptr_t) {
|
||||
if (jobid == nptr->jobid) {
|
||||
PMIX_LOAD_NSPACE(nspace, nptr->nspace);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
int opal_pmix_convert_nspace(opal_jobid_t *jobid, pmix_nspace_t nspace)
|
||||
{
|
||||
opal_nptr_t *nptr;
|
||||
opal_jobid_t jid;
|
||||
|
||||
/* set a default */
|
||||
*jobid = OPAL_JOBID_INVALID;
|
||||
|
||||
if (opal_process_info.nativelaunch) {
|
||||
return opal_convert_string_to_jobid(jobid, nspace);
|
||||
} else {
|
||||
/* cycle across our list of known jobids */
|
||||
OPAL_LIST_FOREACH(nptr, &localnspaces, opal_nptr_t) {
|
||||
if (PMIX_CHECK_NSPACE(nspace, nptr->nspace)) {
|
||||
*jobid = nptr->jobid;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
/* if we get here, we don't know this nspace */
|
||||
OPAL_HASH_STR(nspace, jid);
|
||||
*jobid = jid;
|
||||
nptr = OBJ_NEW(opal_nptr_t);
|
||||
nptr->jobid = jid;
|
||||
PMIX_LOAD_NSPACE(nptr->nspace, nspace);
|
||||
opal_list_append(&localnspaces, &nptr->super);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
pmix_status_t opal_pmix_convert_rc(int rc)
|
||||
{
|
||||
switch (rc) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2019 Research Organization for Information Science
|
||||
@ -442,9 +442,13 @@ OPAL_DECLSPEC pmix_proc_state_t opal_pmix_convert_state(int state);
|
||||
OPAL_DECLSPEC int opal_pmix_convert_pstate(pmix_proc_state_t);
|
||||
OPAL_DECLSPEC pmix_status_t opal_pmix_convert_rc(int rc);
|
||||
OPAL_DECLSPEC int opal_pmix_convert_status(pmix_status_t status);
|
||||
OPAL_DECLSPEC int opal_pmix_convert_jobid(pmix_nspace_t nspace, opal_jobid_t jobid);
|
||||
OPAL_DECLSPEC int opal_pmix_convert_nspace(opal_jobid_t *jobid, pmix_nspace_t nspace);
|
||||
OPAL_DECLSPEC void opal_pmix_setup_nspace_tracker(void);
|
||||
OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void);
|
||||
|
||||
#define OPAL_PMIX_CONVERT_JOBID(n, j) \
|
||||
(void)opal_snprintf_jobid((n), PMIX_MAX_NSLEN, (j))
|
||||
opal_pmix_convert_jobid((n), (j))
|
||||
|
||||
#define OPAL_PMIX_CONVERT_VPID(r, v) \
|
||||
do { \
|
||||
@ -454,6 +458,7 @@ OPAL_DECLSPEC int opal_pmix_convert_status(pmix_status_t status);
|
||||
(r) = (v); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define OPAL_PMIX_CONVERT_NAME(p, n) \
|
||||
do { \
|
||||
OPAL_PMIX_CONVERT_JOBID((p)->nspace, (n)->jobid); \
|
||||
@ -462,15 +467,17 @@ OPAL_DECLSPEC int opal_pmix_convert_status(pmix_status_t status);
|
||||
|
||||
|
||||
#define OPAL_PMIX_CONVERT_NSPACE(r, j, n) \
|
||||
(r) = opal_convert_string_to_jobid((j), (n))
|
||||
(r) = opal_pmix_convert_nspace((j), (n))
|
||||
|
||||
#define OPAL_PMIX_CONVERT_RANK(v, r) \
|
||||
do { \
|
||||
if (PMIX_RANK_WILDCARD == (r)) { \
|
||||
(v) = OPAL_VPID_WILDCARD; \
|
||||
} else { \
|
||||
(v) = (r); \
|
||||
} \
|
||||
#define OPAL_PMIX_CONVERT_RANK(v, r) \
|
||||
do { \
|
||||
if (PMIX_RANK_WILDCARD == (r)) { \
|
||||
(v) = OPAL_VPID_WILDCARD; \
|
||||
} else if (PMIX_RANK_INVALID == (r)) { \
|
||||
(v) = OPAL_VPID_INVALID; \
|
||||
} else { \
|
||||
(v) = (r); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define OPAL_PMIX_CONVERT_PROCT(r, n, p) \
|
||||
|
@ -28,6 +28,7 @@ opal_process_name_t opal_name_wildcard = {OPAL_JOBID_WILDCARD, OPAL_VPID_WILDCAR
|
||||
opal_process_name_t opal_name_invalid = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID};
|
||||
|
||||
opal_process_info_t opal_process_info = {
|
||||
.nativelaunch = false,
|
||||
.nodename = NULL,
|
||||
.top_session_dir = NULL,
|
||||
.job_session_dir = NULL,
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include "opal/types.h"
|
||||
#include "opal/dss/dss.h"
|
||||
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
@ -105,6 +104,7 @@ typedef struct {
|
||||
OBJ_CLASS_DECLARATION(opal_namelist_t);
|
||||
|
||||
typedef struct opal_process_info_t {
|
||||
bool nativelaunch; /**< launched by mpirun */
|
||||
char *nodename; /**< string name for this node */
|
||||
char *top_session_dir; /**< Top-level session directory */
|
||||
char *job_session_dir; /**< Session directory for job */
|
||||
|
2
prrte
2
prrte
@ -1 +1 @@
|
||||
Subproject commit 53296629f9aae70a6cd2586c77306a499e5b573a
|
||||
Subproject commit c2e2231cc47c3df0fb3e40c130a9fecd1ca5cacf
|
Загрузка…
Ссылка в новой задаче
Block a user