1
1
Этот коммит содержится в:
Ralph Castain 2016-07-03 20:45:08 -07:00
родитель 0b915f1bb7
Коммит c9ada8e095
3 изменённых файлов: 24 добавлений и 31 удалений

Просмотреть файл

@ -648,10 +648,8 @@ static pmix_status_t pmix_server_authenticate(pmix_pending_connection_t *pnd,
pmix_output_verbose(2, pmix_globals.debug_output, pmix_output_verbose(2, pmix_globals.debug_output,
"validation of client credential failed"); "validation of client credential failed");
free(msg); free(msg);
if (NULL != psave) { pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL);
pmix_pointer_array_set_item(&pmix_server_globals.clients, psave->index, NULL); PMIX_RELEASE(psave);
PMIX_RELEASE(psave);
}
/* send an error reply to the client */ /* send an error reply to the client */
goto error; goto error;
} }

47
opal/mca/pmix/pmix2x/pmix/src/tool/pmix_tool.c Обычный файл → Исполняемый файл
Просмотреть файл

@ -218,9 +218,12 @@ pmix_output(0, "TOOL INIT");
if (0 == strcmp(PMIX_EVENT_BASE, info[n].key)) { if (0 == strcmp(PMIX_EVENT_BASE, info[n].key)) {
pmix_globals.evbase = (pmix_event_base_t*)info[n].value.data.ptr; pmix_globals.evbase = (pmix_event_base_t*)info[n].value.data.ptr;
pmix_globals.external_evbase = true; pmix_globals.external_evbase = true;
} else if (strcmp(info[i].key, PMIX_SERVER_PIDINFO) == 0) {
server_pid = info[i].value.data.integer;
} }
} }
} }
/* setup the globals */ /* setup the globals */
pmix_globals_init(); pmix_globals_init();
PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t); PMIX_CONSTRUCT(&pmix_client_globals.pending_requests, pmix_list_t);
@ -254,31 +257,19 @@ pmix_output(0, "TOOL INIT");
} }
} }
/* setup the path to the daemon rendezvous point */ /* setup the path to the daemon rendezvous point */
memset(&address, 0, sizeof(struct sockaddr_un)); memset(&address, 0, sizeof(struct sockaddr_un));
address.sun_family = AF_UNIX; address.sun_family = AF_UNIX;
/* Get first 10 char's of hostname to match what the server is doing */ /* Get first 10 char's of hostname to match what the server is doing */
gethostname(hostname, hostnamelen); gethostname(hostname, hostnamelen);
/* Get the local hostname, and look for a file named
* /tmp/pmix.hostname.tool - this file will contain
* the URI where the server is listening. The URI consists
* of 3 parts - the code below will parse the string read
* from the file and connect accordingly */
for (i = 0; i < (int)ninfo; i++) {
if (strcmp(info[i].key, PMIX_SERVER_PIDINFO) == 0) {
server_pid = info[i].value.data.integer;
break;
}
}
/* if they gave us a specific pid, then look for that /* if they gave us a specific pid, then look for that
* particular server - otherwise, see if there is only * particular server - otherwise, see if there is only
* one on this node and default to it */ * one on this node and default to it */
if (server_pid != -1) { if (server_pid != -1) {
snprintf(address.sun_path, sizeof(address.sun_path)-1, "%s/pmix.%s.%d", tdir, hostname, server_pid); snprintf(address.sun_path, sizeof(address.sun_path)-1, "%s/pmix.%s.%d", tdir, hostname, server_pid);
/* if the rendezvous file doesn't exist, that's an error */ /* if the rendezvous file doesn't exist, that's an error */
if (0 != access(address.sun_path, R_OK)) { if (0 != access(address.sun_path, R_OK)) {
pmix_output_close(pmix_globals.debug_output); pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize(); pmix_output_finalize();
pmix_class_finalize(); pmix_class_finalize();
@ -294,6 +285,7 @@ pmix_output(0, "TOOL INIT");
} }
/* search the entries for something that starts with pmix.hostname */ /* search the entries for something that starts with pmix.hostname */
if (0 > asprintf(&tmp, "pmix.%s", hostname)) { if (0 > asprintf(&tmp, "pmix.%s", hostname)) {
closedir(cur_dirp);
return PMIX_ERR_NOMEM; return PMIX_ERR_NOMEM;
} }
evar = NULL; evar = NULL;
@ -301,6 +293,8 @@ pmix_output(0, "TOOL INIT");
if (0 == strncmp(dir_entry->d_name, tmp, strlen(tmp))) { if (0 == strncmp(dir_entry->d_name, tmp, strlen(tmp))) {
/* found one - if more than one, then that's an error */ /* found one - if more than one, then that's an error */
if (NULL != evar) { if (NULL != evar) {
closedir(cur_dirp);
free(evar);
free(tmp); free(tmp);
pmix_output_close(pmix_globals.debug_output); pmix_output_close(pmix_globals.debug_output);
pmix_output_finalize(); pmix_output_finalize();
@ -833,14 +827,12 @@ static pmix_status_t usock_connect(struct sockaddr_un *addr, int *fd)
"timeout connecting to server"); "timeout connecting to server");
CLOSE_THE_SOCKET(sd); CLOSE_THE_SOCKET(sd);
continue; continue;
} } else if (ECONNABORTED == pmix_socket_errno) {
/* Some kernels (Linux 2.6) will automatically software
/* Some kernels (Linux 2.6) will automatically software abort a connection that was ECONNREFUSED on the last
abort a connection that was ECONNREFUSED on the last attempt, without even trying to establish the
attempt, without even trying to establish the connection. Handle that case in a semi-rational
connection. Handle that case in a semi-rational way by trying twice before giving up */
way by trying twice before giving up */
else if (ECONNABORTED == pmix_socket_errno) {
pmix_output_verbose(2, pmix_globals.debug_output, pmix_output_verbose(2, pmix_globals.debug_output,
"connection to server aborted by OS - retrying"); "connection to server aborted by OS - retrying");
CLOSE_THE_SOCKET(sd); CLOSE_THE_SOCKET(sd);
@ -848,9 +840,10 @@ static pmix_status_t usock_connect(struct sockaddr_un *addr, int *fd)
} else { } else {
pmix_output_verbose(2, pmix_globals.debug_output, pmix_output_verbose(2, pmix_globals.debug_output,
"Failed to connect, errno = %d, err= %s\n", errno, strerror(errno)); "Failed to connect, errno = %d, err= %s\n", errno, strerror(errno));
CLOSE_THE_SOCKET(sd);
continue; continue;
}
} }
}
/* otherwise, the connect succeeded - so break out of the loop */ /* otherwise, the connect succeeded - so break out of the loop */
break; break;
} }

Просмотреть файл

@ -434,6 +434,8 @@ static void _toolconn(int sd, short args, void *cbdata)
/* otherwise, we have to send the request to the HNP. /* otherwise, we have to send the request to the HNP.
* Eventually, when we switch to nspace instead of an * Eventually, when we switch to nspace instead of an
* integer jobid, we'll just locally assign this value */ * integer jobid, we'll just locally assign this value */
tool.jobid = ORTE_JOBID_INVALID;
tool.vpid = ORTE_VPID_INVALID;
if (NULL != cd->toolcbfunc) { if (NULL != cd->toolcbfunc) {
cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata); cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata);
} }