1
1

Fix the orte-dvm operations so that orterun can connect and execute an application. There is a lingering problem, though. The first invocation of orterun succeeds every time. However, subsequent invocations have a high probability of hanging in the OOB connection handshake.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-08-23 17:28:49 -07:00
родитель 2e23fba5c4
Коммит 68029b27e4
10 изменённых файлов: 159 добавлений и 75 удалений

Просмотреть файл

@ -124,12 +124,15 @@ typedef uint32_t pmix_rank_t;
// client rendezvous points and contact info
#define PMIX_SYSTEM_TMPDIR "pmix.sys.tmpdir" // (char*) temp directory for this system, where PMIx
// server will place tool rendezvous points and contact info
#define PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data
#define PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
#define PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
#define PMIX_SERVER_RANK "pmix.srv.rank" // (pmix_rank_t) Rank of this server
/* tool-related attributes */
#define PMIX_TOOL_NSPACE "pmix.tool.nspace" // (char*) Name of the nspace to use for this tool
#define PMIX_TOOL_RANK "pmix.tool.rank" // (uint32_t) Rank of this tool
#define PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server for a tool
#define PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to
// a local system-level PMIx server
@ -138,7 +141,8 @@ typedef uint32_t pmix_rank_t;
#define PMIX_SERVER_HOSTNAME "pmix.srvr.host" // (char*) node where target server is located
#define PMIX_CONNECT_MAX_RETRIES "pmix.tool.mretries" // (uint32_t) maximum number of times to try to connect to server
#define PMIX_CONNECT_RETRY_DELAY "pmix.tool.retry" // (uint32_t) time in seconds between connection attempts
#define PMIX_TOOL_DO_NOT_CONNECT "pmix.tool.nocon" // (bool) the tool wants to use internal PMIx support, but does
// not want to connect to a PMIx server
/* identification attributes */
#define PMIX_USERID "pmix.euid" // (uint32_t) effective user id

Просмотреть файл

@ -298,7 +298,7 @@ static pmix_status_t _add_hdlr(pmix_rshift_caddy_t *cd, pmix_list_t *xfer)
/* if we are a client, and we haven't already registered a handler of this
* type with our server, or if we have directives, then we need to notify
* the server */
if (!PMIX_PROC_IS_SERVER &&
if (!PMIX_PROC_IS_SERVER && pmix_globals.connected &&
(need_register || 0 < pmix_list_get_size(xfer))) {
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: _add_hdlr sending to server");
@ -821,9 +821,9 @@ static void dereg_event_hdlr(int sd, short args, void *cbdata)
/* need to acquire the object from its originating thread */
PMIX_ACQUIRE_OBJECT(cd);
/* if I am not the server, then I need to notify the server
* to remove my registration */
if (!PMIX_PROC_IS_SERVER) {
/* if I am not the server, and I am connected, then I need
* to notify the server to remove my registration */
if (!PMIX_PROC_IS_SERVER && pmix_globals.connected) {
msg = PMIX_NEW(pmix_buffer_t);
PMIX_BFROPS_PACK(rc, pmix_client_globals.myserver,
msg, &cmd, 1, PMIX_COMMAND);

Просмотреть файл

@ -2071,9 +2071,7 @@ static pmix_status_t dstore_init(pmix_info_t info[], size_t ninfo)
/* for clients */
else {
if (NULL == (dstor_tmpdir = getenv(PMIX_DSTORE_ESH_BASE_PATH))){
rc = PMIX_ERR_BAD_PARAM;
PMIX_ERROR_LOG(rc);
goto err_exit;
return PMIX_ERR_NOT_AVAILABLE; // simply disqualify ourselves
}
if (NULL == (_base_path = strdup(dstor_tmpdir))) {
rc = PMIX_ERR_OUT_OF_RESOURCE;

Просмотреть файл

@ -188,10 +188,18 @@ static pmix_status_t connect_to_peer(struct pmix_peer_t *peer,
if (NULL != info) {
for (n=0; n < ninfo; n++) {
if (0 == strcmp(info[n].key, PMIX_CONNECT_TO_SYSTEM)) {
system_level_only = true;
if (PMIX_UNDEF == info[n].value.type) {
system_level_only = true;
} else {
system_level_only = info[n].value.data.flag;
}
} else if (0 == strcmp(info[n].key, PMIX_CONNECT_SYSTEM_FIRST)) {
/* try the system-level */
system_level = true;
if (PMIX_UNDEF == info[n].value.type) {
system_level = true;
} else {
system_level = info[n].value.data.flag;
}
} else if (0 == strcmp(info[n].key, PMIX_SERVER_PIDINFO)) {
mca_ptl_tcp_component.tool_pid = info[n].value.data.pid;
} else if (0 == strcmp(info[n].key, PMIX_SERVER_URI)) {

Просмотреть файл

@ -164,7 +164,9 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
pmix_kval_t *kptr;
pmix_status_t rc;
char hostname[PMIX_MAX_NSLEN];
bool found;
bool found, do_not_connect = false;
bool nspace_given = false;
bool rank_given = false;
pmix_info_t ginfo;
size_t n;
@ -252,7 +254,18 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
if (0 == strncmp(info[n].key, PMIX_GDS_MODULE, PMIX_MAX_KEYLEN)) {
PMIX_INFO_LOAD(&ginfo, PMIX_GDS_MODULE, info[n].value.data.string, PMIX_STRING);
found = true;
break;
} else if (0 == strncmp(info[n].key, PMIX_TOOL_DO_NOT_CONNECT, PMIX_MAX_KEYLEN)) {
if (PMIX_UNDEF == info[n].value.type) {
do_not_connect = true;
} else {
do_not_connect = info[n].value.data.flag;
}
} else if (0 == strncmp(info[n].key, PMIX_TOOL_NSPACE, PMIX_MAX_KEYLEN)) {
(void)strncpy(pmix_globals.myid.nspace, info[n].value.data.string, PMIX_MAX_NSLEN);
nspace_given = true;
} else if (0 == strncmp(info[n].key, PMIX_TOOL_RANK, PMIX_MAX_KEYLEN)) {
pmix_globals.myid.rank = info[n].value.data.rank;
rank_given = true;
}
}
}
@ -267,19 +280,29 @@ PMIX_EXPORT int PMIx_tool_init(pmix_proc_t *proc,
}
PMIX_INFO_DESTRUCT(&ginfo);
/* connect to the server - returns job info if successful */
rc = pmix_ptl_base_connect_to_peer((struct pmix_peer_t*)pmix_client_globals.myserver, info, ninfo);
if (PMIX_SUCCESS != rc){
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
if (do_not_connect) {
/* ensure we mark that we are not connected */
pmix_globals.connected = false;
/* it is an error if we were not given an nspace/rank */
if (!nspace_given || !rank_given) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_INIT;
}
} else {
/* connect to the server - returns job info if successful */
rc = pmix_ptl_base_connect_to_peer((struct pmix_peer_t*)pmix_client_globals.myserver, info, ninfo);
if (PMIX_SUCCESS != rc){
PMIX_RELEASE_THREAD(&pmix_global_lock);
return rc;
}
}
/* Success, so copy the nspace and rank */
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
proc->rank = pmix_globals.myid.rank;
/* increment our init reference counter */
pmix_globals.init_cntr++;
/* Success, so copy the nspace and rank */
(void)strncpy(proc->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN);
proc->rank = pmix_globals.myid.rank;
/* now finish the initialization by filling our local
* datastore with typical job-related info. No point
@ -642,6 +665,11 @@ PMIX_EXPORT pmix_status_t PMIx_tool_finalize(void)
return PMIX_SUCCESS;
}
pmix_globals.init_cntr = 0;
/* if we are not connected, then we are done */
if (!pmix_globals.connected) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_SUCCESS;
}
PMIX_RELEASE_THREAD(&pmix_global_lock);
pmix_output_verbose(2, pmix_globals.debug_output,

Просмотреть файл

@ -197,7 +197,7 @@ int pmix2x_tool_init(opal_list_t *info)
opal_value_t *val;
pmix_status_t rc;
int ret;
opal_process_name_t pname;
opal_process_name_t pname = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID};
opal_pmix2x_event_t *event;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
@ -213,6 +213,14 @@ int pmix2x_tool_init(opal_list_t *info)
(void)strncpy(pinfo[n].key, val->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&pinfo[n].value, val);
++n;
/* check to see if our name is being given from above */
if (0 == strcmp(val->key, OPAL_PMIX_TOOL_NSPACE)) {
opal_convert_string_to_jobid(&pname.jobid, val->data.string);
(void)strncpy(my_proc.nspace, val->data.string, PMIX_MAX_NSLEN);
} else if (0 == strcmp(val->key, OPAL_PMIX_TOOL_RANK)) {
pname.vpid = val->data.name.vpid;
my_proc.rank = pname.vpid;
}
}
} else {
pinfo = NULL;
@ -237,16 +245,19 @@ int pmix2x_tool_init(opal_list_t *info)
return OPAL_SUCCESS;
}
/* store our jobid and rank */
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
/* if we were launched by the OMPI RTE, then
* the jobid is in a special format - so get it */
mca_pmix_pmix2x_component.native_launch = true;
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
} else {
/* we were launched by someone else, so make the
* jobid just be the hash of the nspace */
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
if (OPAL_JOBID_INVALID == pname.jobid) {
/* store our jobid and rank */
if (NULL != getenv(OPAL_MCA_PREFIX"orte_launch")) {
/* if we were launched by the OMPI RTE, then
* the jobid is in a special format - so get it */
mca_pmix_pmix2x_component.native_launch = true;
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
} else {
/* we were launched by someone else, so make the
* jobid just be the hash of the nspace */
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
}
pname.vpid = pmix2x_convert_rank(my_proc.rank);
}
/* insert this into our list of jobids - it will be the
* first, and so we'll check it first */
@ -255,7 +266,6 @@ int pmix2x_tool_init(opal_list_t *info)
job->jobid = pname.jobid;
opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super);
pname.vpid = pmix2x_convert_rank(my_proc.rank);
opal_proc_set_name(&pname);
/* release the thread in case the event handler fires when

Просмотреть файл

@ -52,20 +52,29 @@ BEGIN_C_DECLS
// accept tool connection requests
#define OPAL_PMIX_SERVER_SYSTEM_SUPPORT "pmix.srvr.sys" // (bool) The host RM wants to declare itself as being the local
// system server for PMIx connection requests
#define OPAL_PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server
#define OPAL_PMIX_SERVER_TMPDIR "pmix.srvr.tmpdir" // (char*) temp directory where PMIx server will place
// client rendezvous points
#define OPAL_PMIX_SYSTEM_TMPDIR "pmix.sys.tmpdir" // (char*) temp directory where PMIx server will place
// tool rendezvous points
#define OPAL_PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to
// a local system-level PMIx server
#define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first
#define OPAL_PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data
#define OPAL_PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
#define OPAL_PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
#define OPAL_PMIX_SERVER_RANK "pmix.srv.rank" // (uint32_t) Rank of this server
/* tool-related attributes */
#define OPAL_PMIX_TOOL_NSPACE "pmix.tool.nspace" // (char*) Name of the nspace to use for this tool
#define OPAL_PMIX_TOOL_RANK "pmix.tool.rank" // (uint32_t) Rank of this tool
#define OPAL_PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server for a tool
#define OPAL_PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to
// a local system-level PMIx server
#define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first
#define OPAL_PMIX_SERVER_URI "pmix.srvr.uri" // (char*) URI of server to be contacted
#define OPAL_PMIX_SERVER_HOSTNAME "pmix.srvr.host" // (char*) node where target server is located
#define OPAL_PMIX_CONNECT_MAX_RETRIES "pmix.tool.mretries" // (uint32_t) maximum number of times to try to connect to server
#define OPAL_PMIX_CONNECT_RETRY_DELAY "pmix.tool.retry" // (uint32_t) time in seconds between connection attempts
#define OPAL_PMIX_TOOL_DO_NOT_CONNECT "pmix.tool.nocon" // (bool) the tool wants to use internal PMIx support, but does
// not want to connect to a PMIx server
/* identification attributes */
#define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id

Просмотреть файл

@ -87,53 +87,77 @@ int orte_ess_base_tool_setup(void)
/* set the event base */
opal_pmix_base_set_evbase(orte_event_base);
/* we have to define our name here */
if (NULL != orte_ess_base_jobid &&
NULL != orte_ess_base_vpid) {
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:obtaining name from environment");
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
return(ret);
}
ORTE_PROC_MY_NAME->jobid = jobid;
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
return(ret);
}
ORTE_PROC_MY_NAME->vpid = vpid;
} else {
/* If we are a tool with no name, then define it here */
uint16_t jobfam;
uint32_t hash32;
uint32_t bias;
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:computing name");
/* hash the nodename */
OPAL_HASH_STR(orte_process_info.nodename, hash32);
bias = (uint32_t)orte_process_info.pid;
/* fold in the bias */
hash32 = hash32 ^ bias;
/* now compress to 16-bits */
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
/* set the name */
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
ORTE_PROC_MY_NAME->vpid = 0;
}
/* my name is set, xfer it to the OPAL layer */
orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
/* initialize - PMIx may set our name here if we attach to
* a PMIx server */
if (NULL != opal_pmix.tool_init) {
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(NULL))) {
opal_list_t info;
opal_value_t *kv;
OBJ_CONSTRUCT(&info, opal_list_t);
/* pass our name so the PMIx layer can use it */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_TOOL_NSPACE);
orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid);
kv->type = OPAL_STRING;
opal_list_append(&info, &kv->super);
/* ditto for our rank */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_TOOL_RANK);
kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid;
kv->type = OPAL_VPID;
opal_list_append(&info, &kv->super);
/* ORTE tools don't need to connect to a PMIx server as
* they will connect via the OOB */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT);
kv->data.flag = true;
kv->type = OPAL_BOOL;
opal_list_append(&info, &kv->super);
if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) {
ORTE_ERROR_LOG(ret);
error = "opal_pmix.init";
OPAL_LIST_DESTRUCT(&info);
goto error;
}
OPAL_LIST_DESTRUCT(&info);
ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
} else {
/* we have to define our name here */
if (NULL != orte_ess_base_jobid &&
NULL != orte_ess_base_vpid) {
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:obtaining name from environment");
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
return(ret);
}
ORTE_PROC_MY_NAME->jobid = jobid;
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
return(ret);
}
ORTE_PROC_MY_NAME->vpid = vpid;
} else {
/* If we are a tool with no name, then define it here */
uint16_t jobfam;
uint32_t hash32;
uint32_t bias;
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:computing name");
/* hash the nodename */
OPAL_HASH_STR(orte_process_info.nodename, hash32);
bias = (uint32_t)orte_process_info.pid;
/* fold in the bias */
hash32 = hash32 ^ bias;
/* now compress to 16-bits */
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
/* set the name */
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
ORTE_PROC_MY_NAME->vpid = 0;
}
/* my name is set, xfer it to the OPAL layer */
orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
}
orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;

Просмотреть файл

@ -577,6 +577,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
if (NULL == (jdata = orte_get_job_data_object(job))) {
/* we can safely ignore this request as the job
* was already cleaned up */
opal_output(0, "NULL JOB");
goto CLEANUP;
}
@ -584,6 +585,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
* can ignore this request as we would have already
* dealt with it */
if (0 < jdata->num_local_procs) {
opal_output(0, "NO PROCS");
goto CLEANUP;
}
@ -620,6 +622,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
OBJ_RELEASE(map);
jdata->map = NULL;
}
opal_output(0, "CLEANUP COMPLETE");
break;

Просмотреть файл

@ -548,7 +548,7 @@ int orte_submit_init(int argc, char *argv[],
OBJ_CONSTRUCT(&val, opal_value_t);
val.key = OPAL_PMIX_PROC_URI;
val.type = OPAL_STRING;
val.data.string = orte_process_info.my_daemon_uri;
val.data.string = orte_process_info.my_hnp_uri;
if (OPAL_SUCCESS != opal_pmix.store_local(ORTE_PROC_MY_HNP, &val)) {
val.key = NULL;
val.data.string = NULL;