1
1

Stop all progress threads prior to releasing the peer objects to avoid a race condition whereby a lost connection could be reported after a peer object was freed and before the threads were stopped.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-06-29 15:48:18 -07:00
родитель 7e5e5fe887
Коммит 85f8eb4c6b

Просмотреть файл

@ -698,24 +698,14 @@ static int component_startup(void)
static void component_shutdown(void)
{
mca_oob_tcp_peer_t *peer;
uint64_t ui64;
int i = 0;
int i = 0, rc;
uint64_t key;
void *node;
opal_output_verbose(2, orte_oob_base_framework.framework_output,
"%s TCP SHUTDOWN",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* cleanup all peers */
OPAL_HASH_TABLE_FOREACH(ui64, uint64, peer, &mca_oob_tcp_component.peers) {
opal_output_verbose(2, orte_oob_base_framework.framework_output,
"%s RELEASING PEER OBJ %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == peer) ? "NULL" : ORTE_NAME_PRINT(&peer->name));
if (NULL != peer) {
OBJ_RELEASE(peer);
}
}
if (0 < orte_oob_base.num_threads) {
for (i=0; i < orte_oob_base.num_threads; i++) {
opal_progress_thread_finalize(mca_oob_tcp_component.ev_threads[i]);
@ -734,6 +724,18 @@ static void component_shutdown(void)
"no hnp or not active");
}
/* release all peers from the hash table */
rc = opal_hash_table_get_first_key_uint64(&mca_oob_tcp_component.peers, &key,
(void **)&peer, &node);
while (OPAL_SUCCESS == rc) {
if (NULL != peer) {
OBJ_RELEASE(peer);
opal_hash_table_set_value_uint64(&mca_oob_tcp_component.peers, key, NULL);
}
rc = opal_hash_table_get_next_key_uint64(&mca_oob_tcp_component.peers, &key,
(void **) &peer, node, &node);
}
opal_output_verbose(2, orte_oob_base_framework.framework_output,
"%s TCP SHUTDOWN done",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));