Fix the orte-dvm operations so that orterun can connect and execute an application. There is a lingering problem, though. The first invocation of orterun succeeds every time. However, subsequent invocations have a high probability of hanging in the OOB connection handshake.
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
2e23fba5c4
Коммит
68029b27e4
@ -124,12 +124,15 @@ typedef uint32_t pmix_rank_t;
|
||||
// client rendezvous points and contact info
|
||||
#define PMIX_SYSTEM_TMPDIR "pmix.sys.tmpdir" // (char*) temp directory for this system, where PMIx
|
||||
// server will place tool rendezvous points and contact info
|
||||
#define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data
|
||||
#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
|
||||
#define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
|
||||
#define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server
|
||||
|
||||
|
||||
/* tool-related attributes */
|
||||
#define PMIX_TOOL_NSPACE "pmix.tool.nspace" // (char*) Name of the nspace to use for this tool
|
||||
#define PMIX_TOOL_RANK "pmix.tool.rank" // (uint32_t) Rank of this tool
|
||||
#define PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server for a tool
|
||||
#define PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to
|
||||
// a local system-level PMIx server
|
||||
@ -138,7 +141,8 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_SERVER_HOSTNAME "pmix.srvr.host" // (char*) node where target server is located
|
||||
#define PMIX_CONNECT_MAX_RETRIES "pmix.tool.mretries" // (uint32_t) maximum number of times to try to connect to server
|
||||
#define PMIX_CONNECT_RETRY_DELAY "pmix.tool.retry" // (uint32_t) time in seconds between connection attempts
|
||||
|
||||
#define PMIX_TOOL_DO_NOT_CONNECT "pmix.tool.nocon" // (bool) the tool wants to use internal PMIx support, but does
|
||||
// not want to connect to a PMIx server
|
||||
|
||||
/* identification attributes */
|
||||
#define PMIX_USERID "pmix.euid" // (uint32_t) effective user id
|
||||
|
@ -298,7 +298,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer)
|
||||
/* if we are a client, and we haven't already registered a handler of this
|
||||
* type with our server, or if we have directives, then we need to notify
|
||||
* the server */
|
||||
if (!PMIX_PROC_IS_SERVER &&
|
||||
if (!PMIX_PROC_IS_SERVER && pmix_globals.connected &&
|
||||
(need_register || 0 < pmix_list_get_size(xfer))) {
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: _add_hdlr sending to server");
|
||||
@ -821,9 +821,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata)
|
||||
/* need to acquire the object from its originating thread */
|
||||
PMIX_ACQUIRE_OBJECT(cd);
|
||||
|
||||
/* if I am not the server, then I need to notify the server
|
||||
* to remove my registration */
|
||||
if (!PMIX_PROC_IS_SERVER) {
|
||||
/* if I am not the server, and I am connected, then I need
|
||||
* to notify the server to remove my registration */
|
||||
if (!PMIX_PROC_IS_SERVER && pmix_globals.connected) {
|
||||
msg = PMIX_NEW(pmix_buffer_t);
|
||||
PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver,
|
||||
msg, &cmd, 1, PMIX_COMMAND);
|
||||
|
@ -2071,9 +2071,7 @@ static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo)
|
||||
/* for clients */
|
||||
else {
|
||||
if (NULL == (dstor_tmpdir = getenv(PMIX_DSTORE_ESH_BASE_PATH))){
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto err_exit;
|
||||
return PMIX_ERR_NOT_AVAILABLE; // simply disqualify ourselves
|
||||
}
|
||||
if (NULL == (_base_path = strdup(dstor_tmpdir))) {
|
||||
rc = PMIX_ERR_OUT_OF_RESOURCE;
|
||||
|
@ -188,10 +188,18 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
|
||||
if (NULL != info) {
|
||||
for (n=0; n < ninfo; n++) {
|
||||
if (0 == strcmp(info[n].key, PMIX_CONNECT_TO_SYSTEM)) {
|
||||
system_level_only = true;
|
||||
if (PMIX_UNDEF == info[n].value.type) {
|
||||
system_level_only = true;
|
||||
} else {
|
||||
system_level_only = info[n].value.data.flag;
|
||||
}
|
||||
} else if (0 == strcmp(info[n].key, PMIX_CONNECT_SYSTEM_FIRST)) {
|
||||
/* try the system-level */
|
||||
system_level = true;
|
||||
if (PMIX_UNDEF == info[n].value.type) {
|
||||
system_level = true;
|
||||
} else {
|
||||
system_level = info[n].value.data.flag;
|
||||
}
|
||||
} else if (0 == strcmp(info[n].key, PMIX_SERVER_PIDINFO)) {
|
||||
mca_ptl_tcp_component.tool_pid = info[n].value.data.pid;
|
||||
} else if (0 == strcmp(info[n].key, PMIX_SERVER_URI)) {
|
||||
|
@ -164,7 +164,9 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
|
||||
pmix_kval_t *kptr;
|
||||
pmix_status_t rc;
|
||||
char hostname[PMIX_MAX_NSLEN];
|
||||
bool found;
|
||||
bool found, do_not_connect = false;
|
||||
bool nspace_given = false;
|
||||
bool rank_given = false;
|
||||
pmix_info_t ginfo;
|
||||
size_t n;
|
||||
|
||||
@ -252,7 +254,18 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
|
||||
if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) {
|
||||
PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, info[n].value.data.string, PMIX_STRING);
|
||||
found = true;
|
||||
break;
|
||||
} else if (0 == strncmp(info[n].key, PMIX_TOOL_DO_NOT_CONNECT, PMIX_MAX_KEYLEN)) {
|
||||
if (PMIX_UNDEF == info[n].value.type) {
|
||||
do_not_connect = true;
|
||||
} else {
|
||||
do_not_connect = info[n].value.data.flag;
|
||||
}
|
||||
} else if (0 == strncmp(info[n].key, PMIX_TOOL_NSPACE, PMIX_MAX_KEYLEN)) {
|
||||
(void)strncpy(pmix_globals.myid.nspace, info[n].value.data.string, PMIX_MAX_NSLEN);
|
||||
nspace_given = true;
|
||||
} else if (0 == strncmp(info[n].key, PMIX_TOOL_RANK, PMIX_MAX_KEYLEN)) {
|
||||
pmix_globals.myid.rank = info[n].value.data.rank;
|
||||
rank_given = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -267,19 +280,29 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
|
||||
}
|
||||
PMIX_INFO_DESTRUCT(&ginfo);
|
||||
|
||||
/* connect to the server - returns job info if successful */
|
||||
rc = pmix_ptl_base_connect_to_peer((struct pmix_peer_t*)pmix_client_globals.myserver, info, ninfo);
|
||||
if (PMIX_SUCCESS != rc){
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
return rc;
|
||||
if (do_not_connect) {
|
||||
/* ensure we mark that we are not connected */
|
||||
pmix_globals.connected = false;
|
||||
/* it is an error if we were not given an nspace/rank */
|
||||
if (!nspace_given || !rank_given) {
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
return PMIX_ERR_INIT;
|
||||
}
|
||||
} else {
|
||||
/* connect to the server - returns job info if successful */
|
||||
rc = pmix_ptl_base_connect_to_peer((struct pmix_peer_t*)pmix_client_globals.myserver, info, ninfo);
|
||||
if (PMIX_SUCCESS != rc){
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
/* Success, so copy the nspace and rank */
|
||||
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
|
||||
proc->rank = pmix_globals.myid.rank;
|
||||
|
||||
/* increment our init reference counter */
|
||||
pmix_globals.init_cntr++;
|
||||
|
||||
/* Success, so copy the nspace and rank */
|
||||
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
|
||||
proc->rank = pmix_globals.myid.rank;
|
||||
|
||||
/* now finish the initialization by filling our local
|
||||
* datastore with typical job-related info. No point
|
||||
@ -642,6 +665,11 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
pmix_globals.init_cntr = 0;
|
||||
/* if we are not connected, then we are done */
|
||||
if (!pmix_globals.connected) {
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
|
@ -197,7 +197,7 @@ int pmix2x_tool_init(opal_list_t *info)
|
||||
opal_value_t *val;
|
||||
pmix_status_t rc;
|
||||
int ret;
|
||||
opal_process_name_t pname;
|
||||
opal_process_name_t pname = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID};
|
||||
opal_pmix2x_event_t *event;
|
||||
|
||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||
@ -213,6 +213,14 @@ int pmix2x_tool_init(opal_list_t *info)
|
||||
(void)strncpy(pinfo[n].key, val->key, PMIX_MAX_KEYLEN);
|
||||
pmix2x_value_load(&pinfo[n].value, val);
|
||||
++n;
|
||||
/* check to see if our name is being given from above */
|
||||
if (0 == strcmp(val->key, OPAL_PMIX_TOOL_NSPACE)) {
|
||||
opal_convert_string_to_jobid(&pname.jobid, val->data.string);
|
||||
(void)strncpy(my_proc.nspace, val->data.string, PMIX_MAX_NSLEN);
|
||||
} else if (0 == strcmp(val->key, OPAL_PMIX_TOOL_RANK)) {
|
||||
pname.vpid = val->data.name.vpid;
|
||||
my_proc.rank = pname.vpid;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pinfo = NULL;
|
||||
@ -237,16 +245,19 @@ int pmix2x_tool_init(opal_list_t *info)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* store our jobid and rank */
|
||||
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
|
||||
/* if we were launched by the OMPI RTE, then
|
||||
* the jobid is in a special format - so get it */
|
||||
mca_pmix_pmix2x_component.native_launch = true;
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
} else {
|
||||
/* we were launched by someone else, so make the
|
||||
* jobid just be the hash of the nspace */
|
||||
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
|
||||
if (OPAL_JOBID_INVALID == pname.jobid) {
|
||||
/* store our jobid and rank */
|
||||
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
|
||||
/* if we were launched by the OMPI RTE, then
|
||||
* the jobid is in a special format - so get it */
|
||||
mca_pmix_pmix2x_component.native_launch = true;
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
} else {
|
||||
/* we were launched by someone else, so make the
|
||||
* jobid just be the hash of the nspace */
|
||||
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
|
||||
}
|
||||
pname.vpid = pmix2x_convert_rank(my_proc.rank);
|
||||
}
|
||||
/* insert this into our list of jobids - it will be the
|
||||
* first, and so we'll check it first */
|
||||
@ -255,7 +266,6 @@ int pmix2x_tool_init(opal_list_t *info)
|
||||
job->jobid = pname.jobid;
|
||||
opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super);
|
||||
|
||||
pname.vpid = pmix2x_convert_rank(my_proc.rank);
|
||||
opal_proc_set_name(&pname);
|
||||
|
||||
/* release the thread in case the event handler fires when
|
||||
|
@ -52,20 +52,29 @@ BEGIN_C_DECLS
|
||||
// accept tool connection requests
|
||||
#define OPAL_PMIX_SERVER_SYSTEM_SUPPORT "pmix.srvr.sys" // (bool) The host RM wants to declare itself as being the local
|
||||
// system server for PMIx connection requests
|
||||
#define OPAL_PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server
|
||||
#define OPAL_PMIX_SERVER_TMPDIR "pmix.srvr.tmpdir" // (char*) temp directory where PMIx server will place
|
||||
// client rendezvous points
|
||||
#define OPAL_PMIX_SYSTEM_TMPDIR "pmix.sys.tmpdir" // (char*) temp directory where PMIx server will place
|
||||
// tool rendezvous points
|
||||
#define OPAL_PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to
|
||||
// a local system-level PMIx server
|
||||
#define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first
|
||||
#define OPAL_PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data
|
||||
#define OPAL_PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
|
||||
#define OPAL_PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
|
||||
#define OPAL_PMIX_SERVER_RANK "pmix.srv.rank" // (uint32_t) Rank of this server
|
||||
|
||||
/* tool-related attributes */
|
||||
#define OPAL_PMIX_TOOL_NSPACE "pmix.tool.nspace" // (char*) Name of the nspace to use for this tool
|
||||
#define OPAL_PMIX_TOOL_RANK "pmix.tool.rank" // (uint32_t) Rank of this tool
|
||||
#define OPAL_PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server for a tool
|
||||
#define OPAL_PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to
|
||||
// a local system-level PMIx server
|
||||
#define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first
|
||||
#define OPAL_PMIX_SERVER_URI "pmix.srvr.uri" // (char*) URI of server to be contacted
|
||||
#define OPAL_PMIX_SERVER_HOSTNAME "pmix.srvr.host" // (char*) node where target server is located
|
||||
#define OPAL_PMIX_CONNECT_MAX_RETRIES "pmix.tool.mretries" // (uint32_t) maximum number of times to try to connect to server
|
||||
#define OPAL_PMIX_CONNECT_RETRY_DELAY "pmix.tool.retry" // (uint32_t) time in seconds between connection attempts
|
||||
#define OPAL_PMIX_TOOL_DO_NOT_CONNECT "pmix.tool.nocon" // (bool) the tool wants to use internal PMIx support, but does
|
||||
// not want to connect to a PMIx server
|
||||
|
||||
|
||||
/* identification attributes */
|
||||
#define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id
|
||||
|
@ -87,53 +87,77 @@ int orte_ess_base_tool_setup(void)
|
||||
/* set the event base */
|
||||
opal_pmix_base_set_evbase(orte_event_base);
|
||||
|
||||
/* we have to define our name here */
|
||||
if (NULL != orte_ess_base_jobid &&
|
||||
NULL != orte_ess_base_vpid) {
|
||||
opal_output_verbose(2, orte_ess_base_framework.framework_output,
|
||||
"ess:tool:obtaining name from environment");
|
||||
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
|
||||
return(ret);
|
||||
}
|
||||
ORTE_PROC_MY_NAME->jobid = jobid;
|
||||
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
|
||||
return(ret);
|
||||
}
|
||||
ORTE_PROC_MY_NAME->vpid = vpid;
|
||||
} else {
|
||||
/* If we are a tool with no name, then define it here */
|
||||
uint16_t jobfam;
|
||||
uint32_t hash32;
|
||||
uint32_t bias;
|
||||
|
||||
opal_output_verbose(2, orte_ess_base_framework.framework_output,
|
||||
"ess:tool:computing name");
|
||||
/* hash the nodename */
|
||||
OPAL_HASH_STR(orte_process_info.nodename, hash32);
|
||||
bias = (uint32_t)orte_process_info.pid;
|
||||
/* fold in the bias */
|
||||
hash32 = hash32 ^ bias;
|
||||
|
||||
/* now compress to 16-bits */
|
||||
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
|
||||
|
||||
/* set the name */
|
||||
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
|
||||
ORTE_PROC_MY_NAME->vpid = 0;
|
||||
}
|
||||
/* my name is set, xfer it to the OPAL layer */
|
||||
orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
|
||||
|
||||
/* initialize - PMIx may set our name here if we attach to
|
||||
* a PMIx server */
|
||||
if (NULL != opal_pmix.tool_init) {
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(NULL))) {
|
||||
opal_list_t info;
|
||||
opal_value_t *kv;
|
||||
OBJ_CONSTRUCT(&info, opal_list_t);
|
||||
/* pass our name so the PMIx layer can use it */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_NSPACE);
|
||||
orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid);
|
||||
kv->type = OPAL_STRING;
|
||||
opal_list_append(&info, &kv->super);
|
||||
/* ditto for our rank */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_RANK);
|
||||
kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
kv->type = OPAL_VPID;
|
||||
opal_list_append(&info, &kv->super);
|
||||
/* ORTE tools don't need to connect to a PMIx server as
|
||||
* they will connect via the OOB */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT);
|
||||
kv->data.flag = true;
|
||||
kv->type = OPAL_BOOL;
|
||||
opal_list_append(&info, &kv->super);
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "opal_pmix.init";
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&info);
|
||||
ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
|
||||
ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
|
||||
} else {
|
||||
/* we have to define our name here */
|
||||
if (NULL != orte_ess_base_jobid &&
|
||||
NULL != orte_ess_base_vpid) {
|
||||
opal_output_verbose(2, orte_ess_base_framework.framework_output,
|
||||
"ess:tool:obtaining name from environment");
|
||||
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
|
||||
return(ret);
|
||||
}
|
||||
ORTE_PROC_MY_NAME->jobid = jobid;
|
||||
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
|
||||
return(ret);
|
||||
}
|
||||
ORTE_PROC_MY_NAME->vpid = vpid;
|
||||
} else {
|
||||
/* If we are a tool with no name, then define it here */
|
||||
uint16_t jobfam;
|
||||
uint32_t hash32;
|
||||
uint32_t bias;
|
||||
|
||||
opal_output_verbose(2, orte_ess_base_framework.framework_output,
|
||||
"ess:tool:computing name");
|
||||
/* hash the nodename */
|
||||
OPAL_HASH_STR(orte_process_info.nodename, hash32);
|
||||
bias = (uint32_t)orte_process_info.pid;
|
||||
/* fold in the bias */
|
||||
hash32 = hash32 ^ bias;
|
||||
|
||||
/* now compress to 16-bits */
|
||||
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
|
||||
|
||||
/* set the name */
|
||||
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
|
||||
ORTE_PROC_MY_NAME->vpid = 0;
|
||||
}
|
||||
/* my name is set, xfer it to the OPAL layer */
|
||||
orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
|
||||
}
|
||||
orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
|
||||
orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
|
||||
|
@ -577,6 +577,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
if (NULL == (jdata = orte_get_job_data_object(job))) {
|
||||
/* we can safely ignore this request as the job
|
||||
* was already cleaned up */
|
||||
opal_output(0, "NULL JOB");
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
@ -584,6 +585,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
* can ignore this request as we would have already
|
||||
* dealt with it */
|
||||
if (0 < jdata->num_local_procs) {
|
||||
opal_output(0, "NO PROCS");
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
@ -620,6 +622,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
OBJ_RELEASE(map);
|
||||
jdata->map = NULL;
|
||||
}
|
||||
opal_output(0, "CLEANUP COMPLETE");
|
||||
break;
|
||||
|
||||
|
||||
|
@ -548,7 +548,7 @@ int orte_submit_init(int argc, char *argv[],
|
||||
OBJ_CONSTRUCT(&val, opal_value_t);
|
||||
val.key = OPAL_PMIX_PROC_URI;
|
||||
val.type = OPAL_STRING;
|
||||
val.data.string = orte_process_info.my_daemon_uri;
|
||||
val.data.string = orte_process_info.my_hnp_uri;
|
||||
if (OPAL_SUCCESS != opal_pmix.store_local(ORTE_PROC_MY_HNP, &val)) {
|
||||
val.key = NULL;
|
||||
val.data.string = NULL;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user