1
1

Track down the last piece of the connection problem. It appears that

providing a netmask of 0 to opal_net_samenetwork results in everything
looking like it is on the same network. Hence, we were not retaining any
of the alternative addresses, so we had no other way to check them.

Refs trac:4870

This commit was SVN r32556.

The following Trac tickets were found above:
  Ticket 4870 --> https://svn.open-mpi.org/trac/ompi/ticket/4870
Этот коммит содержится в:
Ralph Castain 2014-08-20 16:55:36 +00:00
родитель 3c1944054e
Коммит fa28710d53
2 изменённых файлов: 8 добавлений и 1 удалений

Просмотреть файл

@ -282,6 +282,8 @@ static void process_set_peer(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
if (AF_INET != pop->af_family) {
opal_output_verbose(20, orte_oob_base_framework.framework_output,
"%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
goto cleanup;
}
@ -306,8 +308,12 @@ static void process_set_peer(int fd, short args, void *cbdata)
/* do we already have this address? */
OPAL_LIST_FOREACH(maddr, &peer->addrs, mca_oob_tcp_addr_t) {
if (opal_net_samenetwork(&inaddr, (struct sockaddr*)&maddr->addr, 0)) {
/* require only that the subnet be the same */
if (opal_net_samenetwork(&inaddr, (struct sockaddr*)&maddr->addr, 24)) {
/* yes - can ignore this address */
opal_output_verbose(20, orte_oob_base_framework.framework_output,
"%s SAME NETWORK",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
goto cleanup;
}
}

Просмотреть файл

@ -795,6 +795,7 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t *peer)
/* release the socket */
close(peer->sd);
peer->sd = -1;
/* if we were CONNECTING, then we need to mark the address as
* failed and cycle back to try the next address */