Correctly handle connection terminations during finalize so mpirun doesn't hang. Cleanup some corner cases in the error notification system
Этот коммит содержится в:
родитель
1cdc1c121c
Коммит
0a6b8d2c14
@ -1056,30 +1056,55 @@ static bool match_error_registration(pmix_regevents_info_t *reginfoptr, pmix_not
|
|||||||
size_t ninfo = reginfoptr->ninfo;
|
size_t ninfo = reginfoptr->ninfo;
|
||||||
pmix_status_t error = cd->status;
|
pmix_status_t error = cd->status;
|
||||||
|
|
||||||
|
if (NULL == info || ninfo <= 0) {
|
||||||
|
/* this is a general errhandler, and so it always matches.
|
||||||
|
* however, here we are looking for an exact match, and
|
||||||
|
* so we ignore general errhandlers unless the incoming
|
||||||
|
* one is also general */
|
||||||
|
if (NULL == cd->info || 0 == cd->ninfo) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* since this errhandler has info keys, it is not a general errhandler.
|
||||||
|
* If the incoming errhandler *is* a general one, then we must not
|
||||||
|
* match so we can store the general case */
|
||||||
|
if (NULL == cd->info || 0 == cd->ninfo) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* try to match using error name or error group keys - this indicates
|
||||||
|
* a request for a specific error state */
|
||||||
pmix_get_errorgroup(error, errgroup);
|
pmix_get_errorgroup(error, errgroup);
|
||||||
/* try to match using error name or error group keys */
|
|
||||||
for (i=0; i < ninfo; i++) {
|
for (i=0; i < ninfo; i++) {
|
||||||
// if we get a match on any key then we abort the search and return true.
|
// if we get a match on any key then we abort the search and return true.
|
||||||
if ((0 == strcmp(info[i].key, PMIX_ERROR_NAME)) &&
|
if ((0 == strncmp(info[i].key, PMIX_ERROR_NAME, PMIX_MAX_KEYLEN)) &&
|
||||||
(error == info[i].value.data.int32)) {
|
(error == info[i].value.data.int32)) {
|
||||||
return true;
|
return true;
|
||||||
} else if ((0 == strcmp(info[i].key, errgroup)) &&
|
} else if ((0 == strncmp(info[i].key, errgroup, PMIX_MAX_KEYLEN)) &&
|
||||||
(true == info[i].value.data.flag)) {
|
(true == info[i].value.data.flag)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* search by node (error location) key if it is specified in the notify info list*/
|
|
||||||
for (i=0; i<cd->ninfo ; i++) {
|
/* if we get here, then they haven't asked for a specific error state.
|
||||||
if (0 == strcmp (cd->info[i].key, PMIX_ERROR_NODE_NAME)) {
|
* It is possible, however, that they are asking for all errors from a
|
||||||
for (j=0; j<ninfo; j++) {
|
* specific node, so search by node (error location) key if it is
|
||||||
if ((0 == strcmp (info[j].key, PMIX_ERROR_NODE_NAME)) &&
|
* specified in the notify info list */
|
||||||
(0 == strcmp (info[j].value.data.string, cd->info[i].value.data.string))) {
|
for (i=0; i < cd->ninfo ; i++) {
|
||||||
|
if (0 == strncmp(cd->info[i].key, PMIX_ERROR_NODE_NAME, PMIX_MAX_KEYLEN)) {
|
||||||
|
for (j=0; j < ninfo; j++) {
|
||||||
|
if ((0 == strncmp(info[j].key, PMIX_ERROR_NODE_NAME, PMIX_MAX_KEYLEN)) &&
|
||||||
|
(0 == strcmp(info[j].value.data.string, cd->info[i].value.data.string))) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* end of search return false*/
|
|
||||||
|
/* end of search and nothing matched, so return false */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1093,9 +1118,11 @@ static void _notify_error(int sd, short args, void *cbdata)
|
|||||||
pmix_peer_t *peer;
|
pmix_peer_t *peer;
|
||||||
pmix_regevents_info_t *reginfoptr;
|
pmix_regevents_info_t *reginfoptr;
|
||||||
bool notify, notifyall;
|
bool notify, notifyall;
|
||||||
|
|
||||||
pmix_output_verbose(0, pmix_globals.debug_output,
|
pmix_output_verbose(0, pmix_globals.debug_output,
|
||||||
"pmix_server: _notify_error notifying client of error %d",
|
"pmix_server: _notify_error notifying client of error %d",
|
||||||
cd->status);
|
cd->status);
|
||||||
|
|
||||||
/* pack the command */
|
/* pack the command */
|
||||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cmd, 1, PMIX_CMD))) {
|
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(cd->buf, &cmd, 1, PMIX_CMD))) {
|
||||||
PMIX_ERROR_LOG(rc);
|
PMIX_ERROR_LOG(rc);
|
||||||
@ -1157,6 +1184,8 @@ static void _notify_error(int sd, short args, void *cbdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!notify) {
|
if (!notify) {
|
||||||
|
/* if we are not notifying everyone, and this proc isn't to
|
||||||
|
* be notified, then just continue the main loop */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1173,8 +1202,9 @@ static void _notify_error(int sd, short args, void *cbdata)
|
|||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_server _notify_error - match error registration returned notify =%d ", notify);
|
"pmix_server _notify_error - match error registration returned notify =%d ", notify);
|
||||||
}
|
}
|
||||||
if (notify)
|
if (notify) {
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (notify) {
|
if (notify) {
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
@ -1212,6 +1242,7 @@ pmix_status_t pmix_server_notify_error(pmix_status_t status,
|
|||||||
cd->ninfo = ninfo;
|
cd->ninfo = ninfo;
|
||||||
cd->cbfunc = cbfunc;
|
cd->cbfunc = cbfunc;
|
||||||
cd->cbdata = cbdata;
|
cd->cbdata = cbdata;
|
||||||
|
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_server_notify_error status =%d, nprocs = %lu, ninfo =%lu",
|
"pmix_server_notify_error status =%d, nprocs = %lu, ninfo =%lu",
|
||||||
status, nprocs, ninfo);
|
status, nprocs, ninfo);
|
||||||
@ -1227,18 +1258,19 @@ static void reg_errhandler(int sd, short args, void *cbdata)
|
|||||||
int index = 0;
|
int index = 0;
|
||||||
pmix_status_t rc;
|
pmix_status_t rc;
|
||||||
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
||||||
|
|
||||||
/* check if this handler is already registered if so return error */
|
/* check if this handler is already registered if so return error */
|
||||||
if (PMIX_SUCCESS == pmix_lookup_errhandler (cd->err, &index)) {
|
if (PMIX_SUCCESS == pmix_lookup_errhandler(cd->err, &index)) {
|
||||||
/* complete request with error status and return its original reference */
|
/* complete request with error status and return its original reference */
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_server_register_errhandler error - hdlr already registered index = %d",
|
"pmix_server_register_errhandler error - hdlr already registered index = %d",
|
||||||
index);
|
index);
|
||||||
cd->cbfunc.errregcbfn (PMIX_EXISTS, index, cd->cbdata);
|
cd->cbfunc.errregcbfn(PMIX_EXISTS, index, cd->cbdata);
|
||||||
} else {
|
} else {
|
||||||
rc = pmix_add_errhandler (cd->err, cd->info, cd->ninfo, &index);
|
rc = pmix_add_errhandler(cd->err, cd->info, cd->ninfo, &index);
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_server_register_errhandler - success index =%d", index);
|
"pmix_server_register_errhandler - success index =%d", index);
|
||||||
cd->cbfunc.errregcbfn (rc, index, cd->cbdata);
|
cd->cbfunc.errregcbfn(rc, index, cd->cbdata);
|
||||||
}
|
}
|
||||||
cd->active = false;
|
cd->active = false;
|
||||||
PMIX_RELEASE(cd);
|
PMIX_RELEASE(cd);
|
||||||
@ -1250,6 +1282,7 @@ void pmix_server_register_errhandler(pmix_info_t info[], size_t ninfo,
|
|||||||
void *cbdata)
|
void *cbdata)
|
||||||
{
|
{
|
||||||
pmix_shift_caddy_t *cd;
|
pmix_shift_caddy_t *cd;
|
||||||
|
|
||||||
/* need to thread shift this request */
|
/* need to thread shift this request */
|
||||||
cd = PMIX_NEW(pmix_shift_caddy_t);
|
cd = PMIX_NEW(pmix_shift_caddy_t);
|
||||||
cd->info = info;
|
cd->info = info;
|
||||||
@ -1257,8 +1290,10 @@ void pmix_server_register_errhandler(pmix_info_t info[], size_t ninfo,
|
|||||||
cd->err = errhandler;
|
cd->err = errhandler;
|
||||||
cd->cbfunc.errregcbfn = cbfunc;
|
cd->cbfunc.errregcbfn = cbfunc;
|
||||||
cd->cbdata = cbdata;
|
cd->cbdata = cbdata;
|
||||||
|
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_server_register_errhandler shifting to server thread");
|
"pmix_server_register_errhandler shifting to server thread");
|
||||||
|
|
||||||
PMIX_THREADSHIFT(cd, reg_errhandler);
|
PMIX_THREADSHIFT(cd, reg_errhandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1266,8 +1301,11 @@ static void dereg_errhandler(int sd, short args, void *cbdata)
|
|||||||
{
|
{
|
||||||
pmix_status_t rc;
|
pmix_status_t rc;
|
||||||
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
pmix_shift_caddy_t *cd = (pmix_shift_caddy_t*)cbdata;
|
||||||
rc = pmix_remove_errhandler (cd->ref);
|
|
||||||
cd->cbfunc.opcbfn(rc, cd->cbdata);
|
rc = pmix_remove_errhandler(cd->ref);
|
||||||
|
if (NULL != cd->cbfunc.opcbfn) {
|
||||||
|
cd->cbfunc.opcbfn(rc, cd->cbdata);
|
||||||
|
}
|
||||||
cd->active = false;
|
cd->active = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1276,12 +1314,14 @@ void pmix_server_deregister_errhandler(int errhandler_ref,
|
|||||||
void *cbdata)
|
void *cbdata)
|
||||||
{
|
{
|
||||||
pmix_shift_caddy_t *cd;
|
pmix_shift_caddy_t *cd;
|
||||||
|
|
||||||
/* need to thread shift this request */
|
/* need to thread shift this request */
|
||||||
cd = PMIX_NEW(pmix_shift_caddy_t);
|
cd = PMIX_NEW(pmix_shift_caddy_t);
|
||||||
cd->cbfunc.opcbfn = cbfunc;
|
cd->cbfunc.opcbfn = cbfunc;
|
||||||
cd->cbdata = cbdata;
|
cd->cbdata = cbdata;
|
||||||
cd->ref = errhandler_ref;
|
cd->ref = errhandler_ref;
|
||||||
PMIX_THREADSHIFT(cd, dereg_errhandler);
|
PMIX_THREADSHIFT(cd, dereg_errhandler);
|
||||||
|
|
||||||
PMIX_WAIT_FOR_COMPLETION(cd->active);
|
PMIX_WAIT_FOR_COMPLETION(cd->active);
|
||||||
PMIX_RELEASE(cd);
|
PMIX_RELEASE(cd);
|
||||||
}
|
}
|
||||||
|
@ -165,8 +165,10 @@ void pmix_errhandler_invoke(pmix_status_t status,
|
|||||||
PMIX_INFO_CREATE(iptr, ninfo+1);
|
PMIX_INFO_CREATE(iptr, ninfo+1);
|
||||||
(void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN);
|
(void)strncpy(iptr[0].key, PMIX_ERROR_HANDLER_ID, PMIX_MAX_KEYLEN);
|
||||||
iptr[0].value.type = PMIX_INT;
|
iptr[0].value.type = PMIX_INT;
|
||||||
for (j=0; j < ninfo; j++) {
|
if (NULL != info) {
|
||||||
PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type);
|
for (j=0; j < ninfo; j++) {
|
||||||
|
PMIX_INFO_LOAD(&iptr[j+1], info[j].key, &info[j].value.data, info[j].value.type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < pmix_globals.errregs.size; i++) {
|
for (i = 0; i < pmix_globals.errregs.size; i++) {
|
||||||
@ -221,9 +223,10 @@ pmix_status_t pmix_lookup_errhandler(pmix_notification_fn_t err,
|
|||||||
int i;
|
int i;
|
||||||
pmix_status_t rc = PMIX_ERR_NOT_FOUND;
|
pmix_status_t rc = PMIX_ERR_NOT_FOUND;
|
||||||
pmix_error_reg_info_t *errreg = NULL;
|
pmix_error_reg_info_t *errreg = NULL;
|
||||||
|
|
||||||
for (i = 0; i < pmix_pointer_array_get_size(&pmix_globals.errregs) ; i++) {
|
for (i = 0; i < pmix_pointer_array_get_size(&pmix_globals.errregs) ; i++) {
|
||||||
errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item (&pmix_globals.errregs, i);
|
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs, i);
|
||||||
if((NULL != errreg) && (err == errreg->errhandler)) {
|
if ((NULL != errreg) && (err == errreg->errhandler)) {
|
||||||
*index = i;
|
*index = i;
|
||||||
rc = PMIX_SUCCESS;
|
rc = PMIX_SUCCESS;
|
||||||
break;
|
break;
|
||||||
@ -238,19 +241,25 @@ pmix_status_t pmix_add_errhandler(pmix_notification_fn_t err,
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
pmix_status_t rc = PMIX_SUCCESS;
|
pmix_status_t rc = PMIX_SUCCESS;
|
||||||
pmix_error_reg_info_t *errreg = PMIX_NEW(pmix_error_reg_info_t);
|
pmix_error_reg_info_t *errreg;
|
||||||
|
|
||||||
|
errreg = PMIX_NEW(pmix_error_reg_info_t);
|
||||||
errreg->errhandler = err;
|
errreg->errhandler = err;
|
||||||
errreg->ninfo = ninfo;
|
errreg->ninfo = ninfo;
|
||||||
PMIX_INFO_CREATE(errreg->info, ninfo);
|
if (NULL != info && 0 < ninfo) {
|
||||||
for (i=0; i < ninfo; i++) {
|
PMIX_INFO_CREATE(errreg->info, ninfo);
|
||||||
memcpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN);
|
for (i=0; i < ninfo; i++) {
|
||||||
pmix_value_xfer(&errreg->info[i].value, &info[i].value);
|
(void)strncpy(errreg->info[i].key, info[i].key, PMIX_MAX_KEYLEN);
|
||||||
|
pmix_value_xfer(&errreg->info[i].value, &info[i].value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
*index = pmix_pointer_array_add (&pmix_globals.errregs, errreg);
|
*index = pmix_pointer_array_add(&pmix_globals.errregs, errreg);
|
||||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||||
"pmix_add_errhandler index =%d", *index);
|
"pmix_add_errhandler index =%d", *index);
|
||||||
if (-1 == *index)
|
if (*index < 0) {
|
||||||
|
PMIX_RELEASE(errreg);
|
||||||
rc = PMIX_ERROR;
|
rc = PMIX_ERROR;
|
||||||
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -258,16 +267,19 @@ pmix_status_t pmix_remove_errhandler(int errhandler_ref)
|
|||||||
{
|
{
|
||||||
int rc = PMIX_SUCCESS;
|
int rc = PMIX_SUCCESS;
|
||||||
pmix_error_reg_info_t *errreg;
|
pmix_error_reg_info_t *errreg;
|
||||||
errreg = (pmix_error_reg_info_t*) pmix_pointer_array_get_item (&pmix_globals.errregs,
|
|
||||||
errhandler_ref);
|
errreg = (pmix_error_reg_info_t*)pmix_pointer_array_get_item(&pmix_globals.errregs,
|
||||||
if (NULL != errreg)
|
errhandler_ref);
|
||||||
|
if (NULL != errreg) {
|
||||||
PMIX_RELEASE(errreg);
|
PMIX_RELEASE(errreg);
|
||||||
else
|
pmix_pointer_array_set_item(&pmix_globals.errregs, errhandler_ref, NULL);
|
||||||
|
} else {
|
||||||
rc = PMIX_ERR_NOT_FOUND;
|
rc = PMIX_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
void pmix_get_errorgroup ( pmix_status_t status, char *pmix_error_group)
|
void pmix_get_errorgroup(pmix_status_t status, char *pmix_error_group)
|
||||||
{
|
{
|
||||||
switch(status) {
|
switch(status) {
|
||||||
case PMIX_ERR_UNREACH:
|
case PMIX_ERR_UNREACH:
|
||||||
@ -276,24 +288,24 @@ void pmix_get_errorgroup ( pmix_status_t status, char *pmix_error_group)
|
|||||||
case PMIX_ERR_TIMEOUT:
|
case PMIX_ERR_TIMEOUT:
|
||||||
case PMIX_ERR_PACK_FAILURE:
|
case PMIX_ERR_PACK_FAILURE:
|
||||||
case PMIX_ERR_UNPACK_FAILURE:
|
case PMIX_ERR_UNPACK_FAILURE:
|
||||||
strcpy(pmix_error_group, PMIX_ERROR_GROUP_COMM);
|
(void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_COMM, PMIX_MAX_KEYLEN);
|
||||||
break;
|
break;
|
||||||
case PMIX_ERR_OUT_OF_RESOURCE:
|
case PMIX_ERR_OUT_OF_RESOURCE:
|
||||||
case PMIX_ERR_RESOURCE_BUSY:
|
case PMIX_ERR_RESOURCE_BUSY:
|
||||||
case PMIX_ERR_NOMEM:
|
case PMIX_ERR_NOMEM:
|
||||||
strcpy(pmix_error_group, PMIX_ERROR_GROUP_RESOURCE);
|
(void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_RESOURCE, PMIX_MAX_KEYLEN);
|
||||||
break;
|
break;
|
||||||
case PMIX_ERR_PROC_MIGRATE:
|
case PMIX_ERR_PROC_MIGRATE:
|
||||||
case PMIX_ERR_PROC_CHECKPOINT:
|
case PMIX_ERR_PROC_CHECKPOINT:
|
||||||
case PMIX_ERR_PROC_RESTART:
|
case PMIX_ERR_PROC_RESTART:
|
||||||
strcpy(pmix_error_group, PMIX_ERROR_GROUP_MIGRATE);
|
(void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_MIGRATE, PMIX_MAX_KEYLEN);
|
||||||
break;
|
break;
|
||||||
case PMIX_ERR_PROC_ABORTING:
|
case PMIX_ERR_PROC_ABORTING:
|
||||||
case PMIX_ERR_PROC_REQUESTED_ABORT:
|
case PMIX_ERR_PROC_REQUESTED_ABORT:
|
||||||
case PMIX_ERR_PROC_ABORTED:
|
case PMIX_ERR_PROC_ABORTED:
|
||||||
strcpy(pmix_error_group, PMIX_ERROR_GROUP_ABORT);
|
(void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_ABORT, PMIX_MAX_KEYLEN);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
strcpy(pmix_error_group, PMIX_ERROR_GROUP_GENERAL);
|
(void)strncpy(pmix_error_group, PMIX_ERROR_GROUP_GENERAL, PMIX_MAX_KEYLEN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -970,11 +970,6 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
|
|||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_NAME_PRINT(&pop->peer));
|
ORTE_NAME_PRINT(&pop->peer));
|
||||||
|
|
||||||
/* if we are terminating, or recovery isn't enabled, then don't attempt to reconnect */
|
|
||||||
if (!orte_enable_recovery || orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Mark that we no longer support this peer */
|
/* Mark that we no longer support this peer */
|
||||||
memcpy(&ui64, (char*)&pop->peer, sizeof(uint64_t));
|
memcpy(&ui64, (char*)&pop->peer, sizeof(uint64_t));
|
||||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
|
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
|
||||||
@ -982,19 +977,19 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
|
|||||||
bpr = OBJ_NEW(orte_oob_base_peer_t);
|
bpr = OBJ_NEW(orte_oob_base_peer_t);
|
||||||
}
|
}
|
||||||
opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx);
|
opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx);
|
||||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||||
ui64, NULL))) {
|
ui64, NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup:
|
if (!orte_finalizing) {
|
||||||
/* activate the proc state */
|
/* activate the proc state */
|
||||||
if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) {
|
if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) {
|
||||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST);
|
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||||
} else {
|
} else {
|
||||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED);
|
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OBJ_RELEASE(pop);
|
OBJ_RELEASE(pop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user