1
1

Fix the intercommunictor issue reported by Gilles. Instead of directly checking the reachability bitmap, ask the component if the proc is reachable when doing a send as the component is the final arbiter in such cases. Recirculate any messages that a daemon is trying to send to void race conditions. Cleanup listener sockets so we don't leak them

Этот коммит содержится в:
Ralph Castain 2015-05-11 09:16:25 -07:00
родитель 8497a6a140
Коммит 3cee4152fc
2 изменённых файлов: 14 добавлений и 2 удалений

Просмотреть файл

@ -120,6 +120,13 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
}
/* if nobody could reach it, then that's an error */
if (!reachable) {
/* if we are a daemon or HNP, then it could be that
* this is a local proc we just haven't heard from
* yet due to a race condition. Check that situation */
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
ORTE_OOB_SEND(msg);
return;
}
msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
ORTE_RML_SEND_COMPLETE(msg);
return;
@ -150,8 +157,8 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
msg_sent = false;
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
component = (mca_oob_base_component_t*)cli->cli_component;
/* is this peer addressable by this component? */
if (!opal_bitmap_is_set_bit(&pr->addressable, component->idx)) {
/* is this peer reachable via this component? */
if (!component->is_reachable(&msg->dst)) {
continue;
}
/* it is addressable, so attempt to send via that transport */

Просмотреть файл

@ -876,6 +876,7 @@ static void connection_event_handler(int incoming_sd, short flags, void* cbdata)
static void tcp_ev_cons(mca_oob_tcp_listener_t* event)
{
event->ev_active = false;
event->sd = -1;
}
static void tcp_ev_des(mca_oob_tcp_listener_t* event)
{
@ -883,6 +884,10 @@ static void tcp_ev_des(mca_oob_tcp_listener_t* event)
opal_event_del(&event->event);
}
event->ev_active = false;
if (0 <= event->sd) {
CLOSE_THE_SOCKET(event->sd);
event->sd = -1;
}
}
OBJ_CLASS_INSTANCE(mca_oob_tcp_listener_t,