1
1

Revert "Standardize the handling of shutdown in the OOB TCP component"

This reverts commit open-mpi/ompi@12dccaa911.
Этот коммит содержится в:
Ralph Castain 2015-12-30 07:04:10 -08:00
родитель a04f1cd643
Коммит 1cdc1c121c
2 изменённых файлов: 25 добавлений и 22 удалений

Просмотреть файл

@ -970,7 +970,10 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pop->peer));
MCA_OOB_TCP_CHECK_SHUTDOWN(pop);
/* if we are terminating, or recovery isn't enabled, then don't attempt to reconnect */
if (!orte_enable_recovery || orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
goto cleanup;
}
/* Mark that we no longer support this peer */
memcpy(&ui64, (char*)&pop->peer, sizeof(uint64_t));
@ -984,6 +987,7 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
ORTE_ERROR_LOG(rc);
}
cleanup:
/* activate the proc state */
if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) {
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST);
@ -1006,8 +1010,6 @@ void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&mop->hop));
MCA_OOB_TCP_CHECK_SHUTDOWN(mop);
/* mark that we cannot reach this hop */
memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t));
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
@ -1020,11 +1022,16 @@ void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata)
ORTE_ERROR_LOG(rc);
}
/* if this was a lifeline, then alert */
if (ORTE_SUCCESS != orte_routed.route_lost(&mop->hop)) {
ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_LIFELINE_LOST);
} else {
ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED);
/* report the error back to the OOB and let it try other components
* or declare a problem
*/
if (!orte_finalizing && !orte_abnormal_term_ordered) {
/* if this was a lifeline, then alert */
if (ORTE_SUCCESS != orte_routed.route_lost(&mop->hop)) {
ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_LIFELINE_LOST);
} else {
ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED);
}
}
OBJ_RELEASE(mop);
@ -1042,7 +1049,11 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&mop->hop));
MCA_OOB_TCP_CHECK_SHUTDOWN(mop);
if (orte_finalizing || orte_abnormal_term_ordered) {
/* just ignore the problem */
OBJ_RELEASE(mop);
return;
}
/* mark that this component cannot reach this hop */
memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t));
@ -1110,7 +1121,11 @@ void mca_oob_tcp_component_failed_to_connect(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pop->peer));
MCA_OOB_TCP_CHECK_SHUTDOWN(pop);
/* if we are terminating, then don't attempt to reconnect */
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
OBJ_RELEASE(pop);
return;
}
/* activate the proc state */
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,

Просмотреть файл

@ -92,16 +92,4 @@ ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_failed_to_connect(int fd, short
ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_no_route(int fd, short args, void *cbdata);
ORTE_MODULE_DECLSPEC void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata);
/* provide a macro for handling errors reported during shutdown */
#define MCA_OOB_TCP_CHECK_SHUTDOWN(a) \
do { \
if (!orte_enable_recovery || \
orte_orteds_term_ordered || \
orte_finalizing || \
orte_abnormal_term_ordered) { \
OBJ_RELEASE(a); \
return; \
} \
} while(0);
#endif /* _MCA_OOB_TCP_COMPONENT_H_ */