Fix the intercommunictor issue reported by Gilles. Instead of directly checking the reachability bitmap, ask the component if the proc is reachable when doing a send as the component is the final arbiter in such cases. Recirculate any messages that a daemon is trying to send to void race conditions. Cleanup listener sockets so we don't leak them
Этот коммит содержится в:
родитель
8497a6a140
Коммит
3cee4152fc
@ -120,6 +120,13 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
|
||||
}
|
||||
/* if nobody could reach it, then that's an error */
|
||||
if (!reachable) {
|
||||
/* if we are a daemon or HNP, then it could be that
|
||||
* this is a local proc we just haven't heard from
|
||||
* yet due to a race condition. Check that situation */
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
ORTE_OOB_SEND(msg);
|
||||
return;
|
||||
}
|
||||
msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(msg);
|
||||
return;
|
||||
@ -150,8 +157,8 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
|
||||
msg_sent = false;
|
||||
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
|
||||
component = (mca_oob_base_component_t*)cli->cli_component;
|
||||
/* is this peer addressable by this component? */
|
||||
if (!opal_bitmap_is_set_bit(&pr->addressable, component->idx)) {
|
||||
/* is this peer reachable via this component? */
|
||||
if (!component->is_reachable(&msg->dst)) {
|
||||
continue;
|
||||
}
|
||||
/* it is addressable, so attempt to send via that transport */
|
||||
|
@ -876,6 +876,7 @@ static void connection_event_handler(int incoming_sd, short flags, void* cbdata)
|
||||
static void tcp_ev_cons(mca_oob_tcp_listener_t* event)
|
||||
{
|
||||
event->ev_active = false;
|
||||
event->sd = -1;
|
||||
}
|
||||
static void tcp_ev_des(mca_oob_tcp_listener_t* event)
|
||||
{
|
||||
@ -883,6 +884,10 @@ static void tcp_ev_des(mca_oob_tcp_listener_t* event)
|
||||
opal_event_del(&event->event);
|
||||
}
|
||||
event->ev_active = false;
|
||||
if (0 <= event->sd) {
|
||||
CLOSE_THE_SOCKET(event->sd);
|
||||
event->sd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_tcp_listener_t,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user