Cleanup the code a bit by simply adding our nspace to the top of the list of jobid <-> nspace correlations. Add two new APIs to opal_pmix for registering new jobid/nspace pairs and retrieving an nspace given a jobid - these are required to support connect/accept. No impact on the PMIx library.
Этот коммит содержится в:
родитель
f713e71d51
Коммит
a4a3dfd480
@ -139,7 +139,13 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
opal_argv_append_nosize(&members, nstring);
|
opal_argv_append_nosize(&members, nstring);
|
||||||
free(nstring);
|
free(nstring);
|
||||||
/* have to add the number of procs in the job so the remote side
|
/* have to add the number of procs in the job so the remote side
|
||||||
* can correctly add the procs by computing their names */
|
* can correctly add the procs by computing their names, and our nspace
|
||||||
|
* so they can update their records */
|
||||||
|
if (NULL == (nstring = (char*)opal_pmix.get_nspace(OMPI_PROC_MY_NAME->jobid))) {
|
||||||
|
opal_argv_free(members);
|
||||||
|
return OMPI_ERR_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
opal_argv_append_nosize(&members, nstring);
|
||||||
(void)asprintf(&nstring, "%d", size);
|
(void)asprintf(&nstring, "%d", size);
|
||||||
opal_argv_append_nosize(&members, nstring);
|
opal_argv_append_nosize(&members, nstring);
|
||||||
free(nstring);
|
free(nstring);
|
||||||
@ -171,6 +177,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
}
|
}
|
||||||
opal_argv_append_nosize(&members, nstring);
|
opal_argv_append_nosize(&members, nstring);
|
||||||
free(nstring);
|
free(nstring);
|
||||||
|
if (NULL == (nstring = (char*)opal_pmix.get_nspace(proc_list[i]->super.proc_name.jobid))) {
|
||||||
|
opal_argv_free(members);
|
||||||
|
return OMPI_ERR_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
opal_argv_append_nosize(&members, nstring);
|
||||||
}
|
}
|
||||||
if (!dense) {
|
if (!dense) {
|
||||||
free(proc_list);
|
free(proc_list);
|
||||||
@ -246,6 +257,17 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
OPAL_LIST_DESTRUCT(&mlist);
|
OPAL_LIST_DESTRUCT(&mlist);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
/* step over the nspace */
|
||||||
|
++i;
|
||||||
|
if (NULL == members[i]) {
|
||||||
|
/* this shouldn't happen and is an error */
|
||||||
|
OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM);
|
||||||
|
OPAL_LIST_DESTRUCT(&mlist);
|
||||||
|
opal_argv_free(members);
|
||||||
|
free(rport);
|
||||||
|
rc = OMPI_ERR_BAD_PARAM;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
/* if the rank is wildcard, then we need to add all procs
|
/* if the rank is wildcard, then we need to add all procs
|
||||||
* in that job to the list */
|
* in that job to the list */
|
||||||
if (OPAL_VPID_WILDCARD == nm->name.vpid) {
|
if (OPAL_VPID_WILDCARD == nm->name.vpid) {
|
||||||
@ -295,6 +317,16 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
|||||||
OPAL_LIST_DESTRUCT(&rlist);
|
OPAL_LIST_DESTRUCT(&rlist);
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
/* next entry is the nspace - register it */
|
||||||
|
++i;
|
||||||
|
if (NULL == members[i]) {
|
||||||
|
OMPI_ERROR_LOG(OMPI_ERR_NOT_SUPPORTED);
|
||||||
|
opal_argv_free(members);
|
||||||
|
OPAL_LIST_DESTRUCT(&ilist);
|
||||||
|
OPAL_LIST_DESTRUCT(&rlist);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
opal_pmix.register_jobid(nm->name.jobid, members[i]);
|
||||||
if (OPAL_VPID_WILDCARD == nm->name.vpid) {
|
if (OPAL_VPID_WILDCARD == nm->name.vpid) {
|
||||||
jobid = nm->name.jobid;
|
jobid = nm->name.jobid;
|
||||||
OBJ_RELEASE(nm);
|
OBJ_RELEASE(nm);
|
||||||
|
@ -78,6 +78,9 @@ static int cray_unpublish_nb(char **keys, opal_list_t *info,
|
|||||||
static const char *cray_get_version(void);
|
static const char *cray_get_version(void);
|
||||||
static int cray_store_local(const opal_process_name_t *proc,
|
static int cray_store_local(const opal_process_name_t *proc,
|
||||||
opal_value_t *val);
|
opal_value_t *val);
|
||||||
|
static const char *cray_get_nspace(opal_jobid_t jobid);
|
||||||
|
static void cray_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
static bool cray_get_attr(const char *attr, opal_value_t **kv);
|
static bool cray_get_attr(const char *attr, opal_value_t **kv);
|
||||||
#endif
|
#endif
|
||||||
@ -109,7 +112,9 @@ const opal_pmix_base_module_t opal_pmix_cray_module = {
|
|||||||
.get_version = cray_get_version,
|
.get_version = cray_get_version,
|
||||||
.register_errhandler = opal_pmix_base_register_handler,
|
.register_errhandler = opal_pmix_base_register_handler,
|
||||||
.deregister_errhandler = opal_pmix_base_deregister_handler,
|
.deregister_errhandler = opal_pmix_base_deregister_handler,
|
||||||
.store_local = cray_store_local
|
.store_local = cray_store_local,
|
||||||
|
.get_nspace = cray_get_nspace,
|
||||||
|
.register_jobid = cray_register_jobid
|
||||||
};
|
};
|
||||||
|
|
||||||
// usage accounting
|
// usage accounting
|
||||||
@ -814,6 +819,16 @@ static int cray_store_local(const opal_process_name_t *proc,
|
|||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *cray_get_nspace(opal_jobid_t jobid)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cray_register_jobid(opal_jobid_t jobid, const char *nspace)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
static char* pmix_error(int pmix_err)
|
static char* pmix_error(int pmix_err)
|
||||||
{
|
{
|
||||||
char * err_msg;
|
char * err_msg;
|
||||||
|
@ -701,6 +701,12 @@ typedef void (*opal_pmix_base_module_deregister_fn_t)(void);
|
|||||||
typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc,
|
typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc,
|
||||||
opal_value_t *val);
|
opal_value_t *val);
|
||||||
|
|
||||||
|
/* retrieve the nspace corresponding to a given jobid */
|
||||||
|
typedef const char* (*opal_pmix_base_module_get_nspace_fn_t)(opal_jobid_t jobid);
|
||||||
|
|
||||||
|
/* register a jobid-to-nspace pair */
|
||||||
|
typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, const char *nspace);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the standard public API data structure
|
* the standard public API data structure
|
||||||
*/
|
*/
|
||||||
@ -745,6 +751,8 @@ typedef struct {
|
|||||||
opal_pmix_base_module_register_fn_t register_errhandler;
|
opal_pmix_base_module_register_fn_t register_errhandler;
|
||||||
opal_pmix_base_module_deregister_fn_t deregister_errhandler;
|
opal_pmix_base_module_deregister_fn_t deregister_errhandler;
|
||||||
opal_pmix_base_module_store_fn_t store_local;
|
opal_pmix_base_module_store_fn_t store_local;
|
||||||
|
opal_pmix_base_module_get_nspace_fn_t get_nspace;
|
||||||
|
opal_pmix_base_module_register_jobid_fn_t register_jobid;
|
||||||
} opal_pmix_base_module_t;
|
} opal_pmix_base_module_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -30,11 +30,24 @@
|
|||||||
|
|
||||||
BEGIN_C_DECLS
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
OPAL_DECLSPEC extern opal_pmix_base_component_t mca_pmix_pmix1_component;
|
typedef struct {
|
||||||
|
opal_pmix_base_component_t super;
|
||||||
|
opal_list_t jobids;
|
||||||
|
bool native_launch;
|
||||||
|
} mca_pmix_pmix1_component_t;
|
||||||
|
|
||||||
|
OPAL_DECLSPEC extern mca_pmix_pmix1_component_t mca_pmix_pmix1xx_component;
|
||||||
|
|
||||||
OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_pmix1xx_module;
|
OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_pmix1xx_module;
|
||||||
|
|
||||||
/**** INTERNAL OBJECTS ****/
|
/**** INTERNAL OBJECTS ****/
|
||||||
|
typedef struct {
|
||||||
|
opal_list_item_t super;
|
||||||
|
opal_jobid_t jobid;
|
||||||
|
char nspace[PMIX_MAX_NSLEN + 1];
|
||||||
|
} opal_pmix1_jobid_trkr_t;
|
||||||
|
OBJ_CLASS_DECLARATION(opal_pmix1_jobid_trkr_t);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
opal_object_t super;
|
opal_object_t super;
|
||||||
pmix_proc_t p;
|
pmix_proc_t p;
|
||||||
|
@ -31,19 +31,8 @@
|
|||||||
#include "opal/mca/pmix/pmix1xx/pmix/include/pmix.h"
|
#include "opal/mca/pmix/pmix1xx/pmix/include/pmix.h"
|
||||||
#include "opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/buffer_ops.h"
|
#include "opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/buffer_ops.h"
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
opal_list_item_t super;
|
|
||||||
opal_jobid_t jobid;
|
|
||||||
char nspace[PMIX_MAX_NSLEN + 1];
|
|
||||||
} opal_pmix1_jobid_trkr_t;
|
|
||||||
static OBJ_CLASS_INSTANCE(opal_pmix1_jobid_trkr_t,
|
|
||||||
opal_list_item_t,
|
|
||||||
NULL, NULL);
|
|
||||||
|
|
||||||
static pmix_proc_t my_proc;
|
static pmix_proc_t my_proc;
|
||||||
static char *dbgvalue=NULL;
|
static char *dbgvalue=NULL;
|
||||||
static opal_list_t jobids;
|
|
||||||
static bool native_launch = false;
|
|
||||||
|
|
||||||
static void myerr(pmix_status_t status,
|
static void myerr(pmix_status_t status,
|
||||||
pmix_proc_t procs[], size_t nprocs,
|
pmix_proc_t procs[], size_t nprocs,
|
||||||
@ -87,12 +76,11 @@ int pmix1_client_init(void)
|
|||||||
opal_process_name_t pname;
|
opal_process_name_t pname;
|
||||||
pmix_status_t rc;
|
pmix_status_t rc;
|
||||||
int dbg;
|
int dbg;
|
||||||
|
opal_pmix1_jobid_trkr_t *job;
|
||||||
|
|
||||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||||
"PMIx_client init");
|
"PMIx_client init");
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&jobids, opal_list_t);
|
|
||||||
|
|
||||||
if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) {
|
if (0 < (dbg = opal_output_get_verbosity(opal_pmix_base_framework.framework_output))) {
|
||||||
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
|
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
|
||||||
putenv(dbgvalue);
|
putenv(dbgvalue);
|
||||||
@ -106,13 +94,20 @@ int pmix1_client_init(void)
|
|||||||
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
|
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
|
||||||
/* if we were launched by the OMPI RTE, then
|
/* if we were launched by the OMPI RTE, then
|
||||||
* the jobid is in a special format - so get it */
|
* the jobid is in a special format - so get it */
|
||||||
native_launch = true;
|
mca_pmix_pmix1xx_component.native_launch = true;
|
||||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||||
} else {
|
} else {
|
||||||
/* we were launched by someone else, so make the
|
/* we were launched by someone else, so make the
|
||||||
* jobid just be the hash of the nspace */
|
* jobid just be the hash of the nspace */
|
||||||
OPAL_HASH_STR(my_proc.nspace, pname.jobid);
|
OPAL_HASH_STR(my_proc.nspace, pname.jobid);
|
||||||
}
|
}
|
||||||
|
/* insert this into our list of jobids - it will be the
|
||||||
|
* first, and so we'll check it first */
|
||||||
|
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
||||||
|
(void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||||
|
job->jobid = pname.jobid;
|
||||||
|
opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super);
|
||||||
|
|
||||||
pname.vpid = my_proc.rank;
|
pname.vpid = my_proc.rank;
|
||||||
opal_proc_set_name(&pname);
|
opal_proc_set_name(&pname);
|
||||||
|
|
||||||
@ -134,8 +129,6 @@ int pmix1_client_finalize(void)
|
|||||||
|
|
||||||
rc = PMIx_Finalize();
|
rc = PMIx_Finalize();
|
||||||
|
|
||||||
OPAL_LIST_DESTRUCT(&jobids);
|
|
||||||
|
|
||||||
return pmix1_convert_rc(rc);
|
return pmix1_convert_rc(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,7 +150,7 @@ int pmix1_abort(int flag, const char *msg,
|
|||||||
pmix_proc_t *parray=NULL;
|
pmix_proc_t *parray=NULL;
|
||||||
size_t n, cnt=0;
|
size_t n, cnt=0;
|
||||||
opal_namelist_t *ptr;
|
opal_namelist_t *ptr;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||||
"PMIx_client abort");
|
"PMIx_client abort");
|
||||||
@ -168,20 +161,19 @@ int pmix1_abort(int flag, const char *msg,
|
|||||||
PMIX_PROC_CREATE(parray, cnt);
|
PMIX_PROC_CREATE(parray, cnt);
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == ptr->name.jobid) {
|
job = NULL;
|
||||||
(void)strncpy(parray[n].nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == ptr->name.jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == ptr->name.jobid) {
|
|
||||||
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
parray[n].rank = ptr->name.vpid;
|
parray[n].rank = ptr->name.vpid;
|
||||||
++n;
|
++n;
|
||||||
}
|
}
|
||||||
@ -201,23 +193,22 @@ int pmix1_store_local(const opal_process_name_t *proc, opal_value_t *val)
|
|||||||
pmix_value_t kv;
|
pmix_value_t kv;
|
||||||
pmix_status_t rc;
|
pmix_status_t rc;
|
||||||
pmix_proc_t p;
|
pmix_proc_t p;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
if (NULL != proc) {
|
if (NULL != proc) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == proc->jobid) {
|
job = NULL;
|
||||||
(void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == proc->jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == proc->jobid) {
|
|
||||||
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
p.rank = proc->vpid;
|
p.rank = proc->vpid;
|
||||||
} else {
|
} else {
|
||||||
/* use our name */
|
/* use our name */
|
||||||
@ -259,7 +250,7 @@ int pmix1_fence(opal_list_t *procs, int collect_data)
|
|||||||
size_t n, cnt=0;
|
size_t n, cnt=0;
|
||||||
opal_namelist_t *ptr;
|
opal_namelist_t *ptr;
|
||||||
pmix_info_t info, *iptr;
|
pmix_info_t info, *iptr;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||||
"PMIx_client fence");
|
"PMIx_client fence");
|
||||||
@ -270,20 +261,19 @@ int pmix1_fence(opal_list_t *procs, int collect_data)
|
|||||||
PMIX_PROC_CREATE(parray, cnt);
|
PMIX_PROC_CREATE(parray, cnt);
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == ptr->name.jobid) {
|
job = NULL;
|
||||||
(void)strncpy(parray[n].nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == ptr->name.jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == ptr->name.jobid) {
|
|
||||||
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
parray[n].rank = ptr->name.vpid;
|
parray[n].rank = ptr->name.vpid;
|
||||||
++n;
|
++n;
|
||||||
}
|
}
|
||||||
@ -322,7 +312,7 @@ int pmix1_fencenb(opal_list_t *procs, int collect_data,
|
|||||||
opal_namelist_t *ptr;
|
opal_namelist_t *ptr;
|
||||||
pmix1_opcaddy_t *op;
|
pmix1_opcaddy_t *op;
|
||||||
pmix_info_t info, *iptr;
|
pmix_info_t info, *iptr;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||||
"PMIx_client fence_nb");
|
"PMIx_client fence_nb");
|
||||||
@ -333,20 +323,19 @@ int pmix1_fencenb(opal_list_t *procs, int collect_data,
|
|||||||
PMIX_PROC_CREATE(parray, cnt);
|
PMIX_PROC_CREATE(parray, cnt);
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == ptr->name.jobid) {
|
job = NULL;
|
||||||
(void)strncpy(parray[n].nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == ptr->name.jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == ptr->name.jobid) {
|
|
||||||
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
parray[n].rank = ptr->name.vpid;
|
parray[n].rank = ptr->name.vpid;
|
||||||
++n;
|
++n;
|
||||||
}
|
}
|
||||||
@ -406,7 +395,7 @@ int pmix1_get(const opal_process_name_t *proc, const char *key,
|
|||||||
size_t ninfo, n;
|
size_t ninfo, n;
|
||||||
pmix_info_t *pinfo;
|
pmix_info_t *pinfo;
|
||||||
opal_value_t *ival;
|
opal_value_t *ival;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||||
"%s PMIx_client get on proc %s key %s",
|
"%s PMIx_client get on proc %s key %s",
|
||||||
@ -416,20 +405,19 @@ int pmix1_get(const opal_process_name_t *proc, const char *key,
|
|||||||
/* prep default response */
|
/* prep default response */
|
||||||
*val = NULL;
|
*val = NULL;
|
||||||
if (NULL != proc) {
|
if (NULL != proc) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == proc->jobid) {
|
job = NULL;
|
||||||
(void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == proc->jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == proc->jobid) {
|
|
||||||
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
(void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
p.rank = proc->vpid;
|
p.rank = proc->vpid;
|
||||||
pptr = &p;
|
pptr = &p;
|
||||||
} else {
|
} else {
|
||||||
@ -509,7 +497,7 @@ int pmix1_getnb(const opal_process_name_t *proc, const char *key,
|
|||||||
pmix_status_t rc;
|
pmix_status_t rc;
|
||||||
size_t n;
|
size_t n;
|
||||||
opal_value_t *ival;
|
opal_value_t *ival;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||||
"%s PMIx_client get_nb on proc %s key %s",
|
"%s PMIx_client get_nb on proc %s key %s",
|
||||||
@ -522,20 +510,19 @@ int pmix1_getnb(const opal_process_name_t *proc, const char *key,
|
|||||||
op->cbdata = cbdata;
|
op->cbdata = cbdata;
|
||||||
|
|
||||||
if (NULL != proc) {
|
if (NULL != proc) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == proc->jobid) {
|
job = NULL;
|
||||||
(void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == proc->jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == proc->jobid) {
|
|
||||||
(void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
(void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
op->p.rank = proc->vpid;
|
op->p.rank = proc->vpid;
|
||||||
} else {
|
} else {
|
||||||
(void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
(void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||||
@ -640,6 +627,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info)
|
|||||||
pmix_status_t ret;
|
pmix_status_t ret;
|
||||||
opal_pmix_pdata_t *d;
|
opal_pmix_pdata_t *d;
|
||||||
opal_value_t *iptr;
|
opal_value_t *iptr;
|
||||||
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||||
"PMIx_client lookup");
|
"PMIx_client lookup");
|
||||||
@ -676,7 +664,7 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info)
|
|||||||
/* transfer the data back */
|
/* transfer the data back */
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) {
|
OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) {
|
||||||
if (native_launch) {
|
if (mca_pmix_pmix1xx_component.native_launch) {
|
||||||
/* if we were launched by the OMPI RTE, then
|
/* if we were launched by the OMPI RTE, then
|
||||||
* the jobid is in a special format - so get it */
|
* the jobid is in a special format - so get it */
|
||||||
opal_convert_string_to_jobid(&d->proc.jobid, pdata[n].proc.nspace);
|
opal_convert_string_to_jobid(&d->proc.jobid, pdata[n].proc.nspace);
|
||||||
@ -685,6 +673,20 @@ int pmix1_lookup(opal_list_t *data, opal_list_t *info)
|
|||||||
* jobid just be the hash of the nspace */
|
* jobid just be the hash of the nspace */
|
||||||
OPAL_HASH_STR(pdata[n].proc.nspace, d->proc.jobid);
|
OPAL_HASH_STR(pdata[n].proc.nspace, d->proc.jobid);
|
||||||
}
|
}
|
||||||
|
/* if we don't already have it, add this to our jobid tracker */
|
||||||
|
job = NULL;
|
||||||
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
|
if (jptr->jobid == d->proc.jobid) {
|
||||||
|
job = jptr;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
||||||
|
(void)strncpy(job->nspace, pdata[n].proc.nspace, PMIX_MAX_NSLEN);
|
||||||
|
job->jobid = d->proc.jobid;
|
||||||
|
opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super);
|
||||||
|
}
|
||||||
if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) {
|
if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) {
|
||||||
d->proc.vpid = OPAL_VPID_WILDCARD;
|
d->proc.vpid = OPAL_VPID_WILDCARD;
|
||||||
} else {
|
} else {
|
||||||
@ -712,6 +714,7 @@ static void lk_cbfunc(pmix_status_t status,
|
|||||||
opal_list_t results, *r;
|
opal_list_t results, *r;
|
||||||
int rc;
|
int rc;
|
||||||
size_t n;
|
size_t n;
|
||||||
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
if (NULL == op->lkcbfunc) {
|
if (NULL == op->lkcbfunc) {
|
||||||
OBJ_RELEASE(op);
|
OBJ_RELEASE(op);
|
||||||
@ -724,7 +727,7 @@ static void lk_cbfunc(pmix_status_t status,
|
|||||||
for (n=0; n < ndata; n++) {
|
for (n=0; n < ndata; n++) {
|
||||||
d = OBJ_NEW(opal_pmix_pdata_t);
|
d = OBJ_NEW(opal_pmix_pdata_t);
|
||||||
opal_list_append(&results, &d->super);
|
opal_list_append(&results, &d->super);
|
||||||
if (native_launch) {
|
if (mca_pmix_pmix1xx_component.native_launch) {
|
||||||
/* if we were launched by the OMPI RTE, then
|
/* if we were launched by the OMPI RTE, then
|
||||||
* the jobid is in a special format - so get it */
|
* the jobid is in a special format - so get it */
|
||||||
opal_convert_string_to_jobid(&d->proc.jobid, data[n].proc.nspace);
|
opal_convert_string_to_jobid(&d->proc.jobid, data[n].proc.nspace);
|
||||||
@ -733,6 +736,20 @@ static void lk_cbfunc(pmix_status_t status,
|
|||||||
* jobid just be the hash of the nspace */
|
* jobid just be the hash of the nspace */
|
||||||
OPAL_HASH_STR(data[n].proc.nspace, d->proc.jobid);
|
OPAL_HASH_STR(data[n].proc.nspace, d->proc.jobid);
|
||||||
}
|
}
|
||||||
|
/* if we don't already have it, add this to our jobid tracker */
|
||||||
|
job = NULL;
|
||||||
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
|
if (jptr->jobid == d->proc.jobid) {
|
||||||
|
job = jptr;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
||||||
|
(void)strncpy(job->nspace, data[n].proc.nspace, PMIX_MAX_NSLEN);
|
||||||
|
job->jobid = d->proc.jobid;
|
||||||
|
opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super);
|
||||||
|
}
|
||||||
if (PMIX_RANK_WILDCARD == data[n].proc.rank) {
|
if (PMIX_RANK_WILDCARD == data[n].proc.rank) {
|
||||||
d->proc.vpid = OPAL_VPID_WILDCARD;
|
d->proc.vpid = OPAL_VPID_WILDCARD;
|
||||||
} else {
|
} else {
|
||||||
@ -898,7 +915,7 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid)
|
|||||||
|
|
||||||
ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace);
|
ret = PMIx_Spawn(pinfo, ninfo, papps, napps, nspace);
|
||||||
if (PMIX_SUCCESS == ret) {
|
if (PMIX_SUCCESS == ret) {
|
||||||
if (native_launch) {
|
if (mca_pmix_pmix1xx_component.native_launch) {
|
||||||
/* if we were launched by the OMPI RTE, then
|
/* if we were launched by the OMPI RTE, then
|
||||||
* the jobid is in a special format - so get it */
|
* the jobid is in a special format - so get it */
|
||||||
opal_convert_string_to_jobid(jobid, nspace);
|
opal_convert_string_to_jobid(jobid, nspace);
|
||||||
@ -906,12 +923,12 @@ int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid)
|
|||||||
/* we were launched by someone else, so make the
|
/* we were launched by someone else, so make the
|
||||||
* jobid just be the hash of the nspace */
|
* jobid just be the hash of the nspace */
|
||||||
OPAL_HASH_STR(nspace, *jobid);
|
OPAL_HASH_STR(nspace, *jobid);
|
||||||
/* add this to our jobid tracker */
|
|
||||||
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
|
||||||
(void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN);
|
|
||||||
job->jobid = *jobid;
|
|
||||||
opal_list_append(&jobids, &job->super);
|
|
||||||
}
|
}
|
||||||
|
/* add this to our jobid tracker */
|
||||||
|
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
||||||
|
(void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN);
|
||||||
|
job->jobid = *jobid;
|
||||||
|
opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super);
|
||||||
}
|
}
|
||||||
PMIX_APP_FREE(papps, napps);
|
PMIX_APP_FREE(papps, napps);
|
||||||
|
|
||||||
@ -928,7 +945,7 @@ static void spcbfunc(pmix_status_t status,
|
|||||||
|
|
||||||
rc = pmix1_convert_rc(status);
|
rc = pmix1_convert_rc(status);
|
||||||
if (PMIX_SUCCESS == status) {
|
if (PMIX_SUCCESS == status) {
|
||||||
if (native_launch) {
|
if (mca_pmix_pmix1xx_component.native_launch) {
|
||||||
/* if we were launched by the OMPI RTE, then
|
/* if we were launched by the OMPI RTE, then
|
||||||
* the jobid is in a special format - so get it */
|
* the jobid is in a special format - so get it */
|
||||||
opal_convert_string_to_jobid(&jobid, nspace);
|
opal_convert_string_to_jobid(&jobid, nspace);
|
||||||
@ -936,12 +953,12 @@ static void spcbfunc(pmix_status_t status,
|
|||||||
/* we were launched by someone else, so make the
|
/* we were launched by someone else, so make the
|
||||||
* jobid just be the hash of the nspace */
|
* jobid just be the hash of the nspace */
|
||||||
OPAL_HASH_STR(nspace, jobid);
|
OPAL_HASH_STR(nspace, jobid);
|
||||||
/* add this to our jobid tracker */
|
|
||||||
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
|
||||||
(void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN);
|
|
||||||
job->jobid = jobid;
|
|
||||||
opal_list_append(&jobids, &job->super);
|
|
||||||
}
|
}
|
||||||
|
/* add this to our jobid tracker */
|
||||||
|
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
||||||
|
(void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN);
|
||||||
|
job->jobid = jobid;
|
||||||
|
opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super);
|
||||||
}
|
}
|
||||||
|
|
||||||
op->spcbfunc(rc, jobid, op->cbdata);
|
op->spcbfunc(rc, jobid, op->cbdata);
|
||||||
@ -1004,7 +1021,7 @@ int pmix1_connect(opal_list_t *procs)
|
|||||||
pmix_proc_t *parray=NULL;
|
pmix_proc_t *parray=NULL;
|
||||||
size_t n, cnt=0;
|
size_t n, cnt=0;
|
||||||
opal_namelist_t *ptr;
|
opal_namelist_t *ptr;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
/* protect against bozo error */
|
/* protect against bozo error */
|
||||||
if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) {
|
if (NULL == procs || 0 == (cnt = opal_list_get_size(procs))) {
|
||||||
@ -1016,20 +1033,20 @@ int pmix1_connect(opal_list_t *procs)
|
|||||||
PMIX_PROC_CREATE(parray, cnt);
|
PMIX_PROC_CREATE(parray, cnt);
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == ptr->name.jobid) {
|
job = NULL;
|
||||||
(void)strncpy(parray[n].nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == ptr->name.jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == ptr->name.jobid) {
|
|
||||||
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND);
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
||||||
parray[n].rank = PMIX_RANK_WILDCARD;
|
parray[n].rank = PMIX_RANK_WILDCARD;
|
||||||
} else {
|
} else {
|
||||||
@ -1070,18 +1087,12 @@ int pmix1_connectnb(opal_list_t *procs,
|
|||||||
PMIX_PROC_CREATE(op->procs, op->nprocs);
|
PMIX_PROC_CREATE(op->procs, op->nprocs);
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == ptr->name.jobid) {
|
OPAL_LIST_FOREACH(job, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
(void)strncpy(op->procs[n].nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
if (job->jobid == ptr->name.jobid) {
|
||||||
} else {
|
(void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
/* look thru our list of jobids and find the
|
break;
|
||||||
* corresponding nspace */
|
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == ptr->name.jobid) {
|
|
||||||
(void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
||||||
@ -1115,18 +1126,12 @@ int pmix1_disconnect(opal_list_t *procs)
|
|||||||
PMIX_PROC_CREATE(parray, cnt);
|
PMIX_PROC_CREATE(parray, cnt);
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == ptr->name.jobid) {
|
OPAL_LIST_FOREACH(job, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
(void)strncpy(parray[n].nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
if (job->jobid == ptr->name.jobid) {
|
||||||
} else {
|
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
/* look thru our list of jobids and find the
|
break;
|
||||||
* corresponding nspace */
|
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == ptr->name.jobid) {
|
|
||||||
(void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
||||||
@ -1169,18 +1174,12 @@ int pmix1_disconnectnb(opal_list_t *procs,
|
|||||||
PMIX_PROC_CREATE(op->procs, op->nprocs);
|
PMIX_PROC_CREATE(op->procs, op->nprocs);
|
||||||
n=0;
|
n=0;
|
||||||
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
OPAL_LIST_FOREACH(ptr, procs, opal_namelist_t) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == ptr->name.jobid) {
|
OPAL_LIST_FOREACH(job, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
(void)strncpy(op->procs[n].nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
if (job->jobid == ptr->name.jobid) {
|
||||||
} else {
|
(void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
||||||
/* look thru our list of jobids and find the
|
break;
|
||||||
* corresponding nspace */
|
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == ptr->name.jobid) {
|
|
||||||
(void)strncpy(op->procs[n].nspace, job->nspace, PMIX_MAX_NSLEN);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
if (OPAL_VPID_WILDCARD == ptr->name.vpid) {
|
||||||
@ -1206,24 +1205,32 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid,
|
|||||||
opal_namelist_t *nm;
|
opal_namelist_t *nm;
|
||||||
int rc;
|
int rc;
|
||||||
pmix_status_t ret;
|
pmix_status_t ret;
|
||||||
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
if (OPAL_JOBID_WILDCARD == jobid) {
|
if (OPAL_JOBID_WILDCARD == jobid) {
|
||||||
nspace = NULL;
|
nspace = NULL;
|
||||||
} else {
|
} else {
|
||||||
nspace = opal_convert_jobid_to_string(jobid);
|
job = NULL;
|
||||||
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
|
if (jptr->jobid == jobid) {
|
||||||
|
job = jptr;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
nspace = job->nspace;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs);
|
ret = PMIx_Resolve_peers(nodename, nspace, &array, &nprocs);
|
||||||
if (NULL != nspace) {
|
|
||||||
free(nspace);
|
|
||||||
}
|
|
||||||
rc = pmix1_convert_rc(ret);
|
rc = pmix1_convert_rc(ret);
|
||||||
|
|
||||||
if (NULL != array && 0 < nprocs) {
|
if (NULL != array && 0 < nprocs) {
|
||||||
for (n=0; n < nprocs; n++) {
|
for (n=0; n < nprocs; n++) {
|
||||||
nm = OBJ_NEW(opal_namelist_t);
|
nm = OBJ_NEW(opal_namelist_t);
|
||||||
opal_list_append(procs, &nm->super);
|
opal_list_append(procs, &nm->super);
|
||||||
if (native_launch) {
|
if (mca_pmix_pmix1xx_component.native_launch) {
|
||||||
/* if we were launched by the OMPI RTE, then
|
/* if we were launched by the OMPI RTE, then
|
||||||
* the jobid is in a special format - so get it */
|
* the jobid is in a special format - so get it */
|
||||||
opal_convert_string_to_jobid(&nm->name.jobid, array[n].nspace);
|
opal_convert_string_to_jobid(&nm->name.jobid, array[n].nspace);
|
||||||
@ -1232,6 +1239,20 @@ int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid,
|
|||||||
* jobid just be the hash of the nspace */
|
* jobid just be the hash of the nspace */
|
||||||
OPAL_HASH_STR(array[n].nspace, nm->name.jobid);
|
OPAL_HASH_STR(array[n].nspace, nm->name.jobid);
|
||||||
}
|
}
|
||||||
|
/* if we don't already have it, add this to our jobid tracker */
|
||||||
|
job = NULL;
|
||||||
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
|
if (jptr->jobid == nm->name.jobid) {
|
||||||
|
job = jptr;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
job = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
||||||
|
(void)strncpy(job->nspace, nspace, PMIX_MAX_NSLEN);
|
||||||
|
job->jobid = jobid;
|
||||||
|
opal_list_append(&mca_pmix_pmix1xx_component.jobids, &job->super);
|
||||||
|
}
|
||||||
nm->name.vpid = array[n].rank;
|
nm->name.vpid = array[n].rank;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1244,29 +1265,25 @@ int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist)
|
|||||||
{
|
{
|
||||||
pmix_status_t ret;
|
pmix_status_t ret;
|
||||||
char *nspace=NULL;
|
char *nspace=NULL;
|
||||||
opal_pmix1_jobid_trkr_t *job;
|
opal_pmix1_jobid_trkr_t *job, *jptr;
|
||||||
|
|
||||||
if (OPAL_JOBID_WILDCARD != jobid) {
|
if (OPAL_JOBID_WILDCARD != jobid) {
|
||||||
/* if the jobid is my own, then we can just use
|
/* look thru our list of jobids and find the
|
||||||
* my namespace */
|
* corresponding nspace */
|
||||||
if (OPAL_PROC_MY_NAME.jobid == jobid) {
|
job = NULL;
|
||||||
nspace = strdup(my_proc.nspace);
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
} else {
|
if (jptr->jobid == jobid) {
|
||||||
/* look thru our list of jobids and find the
|
job = jptr;
|
||||||
* corresponding nspace */
|
break;
|
||||||
OPAL_LIST_FOREACH(job, &jobids, opal_pmix1_jobid_trkr_t) {
|
|
||||||
if (job->jobid == jobid) {
|
|
||||||
nspace = strdup(job->nspace);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (NULL == job) {
|
||||||
|
return OPAL_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
nspace = job->nspace;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = PMIx_Resolve_nodes(nspace, nodelist);
|
ret = PMIx_Resolve_nodes(nspace, nodelist);
|
||||||
if (NULL != nspace) {
|
|
||||||
free(nspace);
|
|
||||||
}
|
|
||||||
|
|
||||||
return pmix1_convert_rc(ret);;
|
return pmix1_convert_rc(ret);;
|
||||||
}
|
}
|
||||||
|
@ -44,6 +44,8 @@
|
|||||||
/* These are functions used by both client and server to
|
/* These are functions used by both client and server to
|
||||||
* access common functions in the embedded PMIx library */
|
* access common functions in the embedded PMIx library */
|
||||||
|
|
||||||
|
static const char *pmix1_get_nspace(opal_jobid_t jobid);
|
||||||
|
static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||||
|
|
||||||
const opal_pmix_base_module_t opal_pmix_pmix1xx_module = {
|
const opal_pmix_base_module_t opal_pmix_pmix1xx_module = {
|
||||||
/* client APIs */
|
/* client APIs */
|
||||||
@ -85,9 +87,39 @@ const opal_pmix_base_module_t opal_pmix_pmix1xx_module = {
|
|||||||
PMIx_Get_version,
|
PMIx_Get_version,
|
||||||
opal_pmix_base_register_handler,
|
opal_pmix_base_register_handler,
|
||||||
opal_pmix_base_deregister_handler,
|
opal_pmix_base_deregister_handler,
|
||||||
pmix1_store_local
|
pmix1_store_local,
|
||||||
|
pmix1_get_nspace,
|
||||||
|
pmix1_register_jobid
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char *pmix1_get_nspace(opal_jobid_t jobid)
|
||||||
|
{
|
||||||
|
opal_pmix1_jobid_trkr_t *jptr;
|
||||||
|
|
||||||
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
|
if (jptr->jobid == jobid) {
|
||||||
|
return jptr->nspace;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pmix1_register_jobid(opal_jobid_t jobid, const char *nspace)
|
||||||
|
{
|
||||||
|
opal_pmix1_jobid_trkr_t *jptr;
|
||||||
|
|
||||||
|
/* if we don't already have it, add this to our jobid tracker */
|
||||||
|
OPAL_LIST_FOREACH(jptr, &mca_pmix_pmix1xx_component.jobids, opal_pmix1_jobid_trkr_t) {
|
||||||
|
if (jptr->jobid == jobid) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jptr = OBJ_NEW(opal_pmix1_jobid_trkr_t);
|
||||||
|
(void)strncpy(jptr->nspace, nspace, PMIX_MAX_NSLEN);
|
||||||
|
jptr->jobid = jobid;
|
||||||
|
opal_list_append(&mca_pmix_pmix1xx_component.jobids, &jptr->super);
|
||||||
|
}
|
||||||
|
|
||||||
pmix_status_t pmix1_convert_opalrc(int rc)
|
pmix_status_t pmix1_convert_opalrc(int rc)
|
||||||
{
|
{
|
||||||
switch (rc) {
|
switch (rc) {
|
||||||
@ -436,6 +468,10 @@ int pmix1_value_unload(opal_value_t *kv,
|
|||||||
|
|
||||||
|
|
||||||
/**** INSTANTIATE INTERNAL CLASSES ****/
|
/**** INSTANTIATE INTERNAL CLASSES ****/
|
||||||
|
OBJ_CLASS_INSTANCE(opal_pmix1_jobid_trkr_t,
|
||||||
|
opal_list_item_t,
|
||||||
|
NULL, NULL);
|
||||||
|
|
||||||
static void opcon(pmix1_opcaddy_t *p)
|
static void opcon(pmix1_opcaddy_t *p)
|
||||||
{
|
{
|
||||||
memset(&p->p, 0, sizeof(pmix_proc_t));
|
memset(&p->p, 0, sizeof(pmix_proc_t));
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "opal_config.h"
|
#include "opal_config.h"
|
||||||
|
|
||||||
#include "opal/constants.h"
|
#include "opal/constants.h"
|
||||||
|
#include "opal/class/opal_list.h"
|
||||||
#include "opal/util/proc.h"
|
#include "opal/util/proc.h"
|
||||||
#include "opal/mca/pmix/pmix.h"
|
#include "opal/mca/pmix/pmix.h"
|
||||||
#include "pmix1.h"
|
#include "pmix1.h"
|
||||||
@ -41,43 +42,47 @@ static int pmix1xx_component_query(mca_base_module_t **module, int *priority);
|
|||||||
* and pointers to our public functions in it
|
* and pointers to our public functions in it
|
||||||
*/
|
*/
|
||||||
|
|
||||||
opal_pmix_base_component_t mca_pmix_pmix1xx_component = {
|
mca_pmix_pmix1_component_t mca_pmix_pmix1xx_component = {
|
||||||
|
{
|
||||||
/* First, the mca_component_t struct containing meta information
|
/* First, the mca_component_t struct containing meta information
|
||||||
about the component itself */
|
about the component itself */
|
||||||
|
|
||||||
.base_version = {
|
.base_version = {
|
||||||
/* Indicate that we are a pmix v1.1.0 component (which also
|
/* Indicate that we are a pmix v1.1.0 component (which also
|
||||||
implies a specific MCA version) */
|
implies a specific MCA version) */
|
||||||
|
|
||||||
OPAL_PMIX_BASE_VERSION_2_0_0,
|
OPAL_PMIX_BASE_VERSION_2_0_0,
|
||||||
|
|
||||||
/* Component name and version */
|
/* Component name and version */
|
||||||
|
|
||||||
.mca_component_name = "pmix1xx",
|
.mca_component_name = "pmix1xx",
|
||||||
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
|
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
|
||||||
OPAL_RELEASE_VERSION),
|
OPAL_RELEASE_VERSION),
|
||||||
|
|
||||||
/* Component open and close functions */
|
/* Component open and close functions */
|
||||||
|
|
||||||
.mca_open_component = pmix1xx_open,
|
.mca_open_component = pmix1xx_open,
|
||||||
.mca_close_component = pmix1xx_close,
|
.mca_close_component = pmix1xx_close,
|
||||||
.mca_query_component = pmix1xx_component_query,
|
.mca_query_component = pmix1xx_component_query,
|
||||||
},
|
},
|
||||||
/* Next the MCA v1.0.0 component meta data */
|
/* Next the MCA v1.0.0 component meta data */
|
||||||
.base_data = {
|
.base_data = {
|
||||||
/* The component is checkpoint ready */
|
/* The component is checkpoint ready */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
.native_launch = false
|
||||||
};
|
};
|
||||||
|
|
||||||
static int pmix1xx_open(void)
|
static int pmix1xx_open(void)
|
||||||
{
|
{
|
||||||
|
OBJ_CONSTRUCT(&mca_pmix_pmix1xx_component.jobids, opal_list_t);
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pmix1xx_close(void)
|
static int pmix1xx_close(void)
|
||||||
{
|
{
|
||||||
|
OPAL_LIST_DESTRUCT(&mca_pmix_pmix1xx_component.jobids);
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,6 +49,8 @@ static int s1_job_connect(opal_list_t *procs);
|
|||||||
static int s1_job_disconnect(opal_list_t *procs);
|
static int s1_job_disconnect(opal_list_t *procs);
|
||||||
static int s1_store_local(const opal_process_name_t *proc,
|
static int s1_store_local(const opal_process_name_t *proc,
|
||||||
opal_value_t *val);
|
opal_value_t *val);
|
||||||
|
static const char *s1_get_nspace(opal_jobid_t jobid);
|
||||||
|
static void s1_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||||
|
|
||||||
const opal_pmix_base_module_t opal_pmix_s1_module = {
|
const opal_pmix_base_module_t opal_pmix_s1_module = {
|
||||||
s1_init,
|
s1_init,
|
||||||
@ -89,7 +91,9 @@ const opal_pmix_base_module_t opal_pmix_s1_module = {
|
|||||||
NULL,
|
NULL,
|
||||||
opal_pmix_base_register_handler,
|
opal_pmix_base_register_handler,
|
||||||
opal_pmix_base_deregister_handler,
|
opal_pmix_base_deregister_handler,
|
||||||
s1_store_local
|
s1_store_local,
|
||||||
|
s1_get_nspace,
|
||||||
|
s1_register_jobid
|
||||||
};
|
};
|
||||||
|
|
||||||
// usage accounting
|
// usage accounting
|
||||||
@ -644,6 +648,14 @@ static int s1_store_local(const opal_process_name_t *proc,
|
|||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *s1_get_nspace(opal_jobid_t jobid)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static void s1_register_jobid(opal_jobid_t jobid, const char *nspace)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
static char* pmix_error(int pmix_err)
|
static char* pmix_error(int pmix_err)
|
||||||
{
|
{
|
||||||
|
@ -56,6 +56,8 @@ static int s2_job_connect(opal_list_t *procs);
|
|||||||
static int s2_job_disconnect(opal_list_t *procs);
|
static int s2_job_disconnect(opal_list_t *procs);
|
||||||
static int s2_store_local(const opal_process_name_t *proc,
|
static int s2_store_local(const opal_process_name_t *proc,
|
||||||
opal_value_t *val);
|
opal_value_t *val);
|
||||||
|
static const char *s2_get_nspace(opal_jobid_t jobid);
|
||||||
|
static void s2_register_jobid(opal_jobid_t jobid, const char *nspace);
|
||||||
|
|
||||||
const opal_pmix_base_module_t opal_pmix_s2_module = {
|
const opal_pmix_base_module_t opal_pmix_s2_module = {
|
||||||
s2_init,
|
s2_init,
|
||||||
@ -96,7 +98,9 @@ const opal_pmix_base_module_t opal_pmix_s2_module = {
|
|||||||
NULL,
|
NULL,
|
||||||
opal_pmix_base_register_handler,
|
opal_pmix_base_register_handler,
|
||||||
opal_pmix_base_deregister_handler,
|
opal_pmix_base_deregister_handler,
|
||||||
s2_store_local
|
s2_store_local,
|
||||||
|
s2_get_nspace,
|
||||||
|
s2_register_jobid
|
||||||
};
|
};
|
||||||
|
|
||||||
// usage accounting
|
// usage accounting
|
||||||
@ -663,6 +667,14 @@ static int s2_store_local(const opal_process_name_t *proc,
|
|||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *s2_get_nspace(opal_jobid_t jobid)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static void s2_register_jobid(opal_jobid_t jobid, const char *nspace)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
static char* pmix_error(int pmix_err)
|
static char* pmix_error(int pmix_err)
|
||||||
{
|
{
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user