1
1

Merge pull request #4930 from kizill/fix-ipv6

fixed ipv6 OOB connection problems (fix issue #1585)
Этот коммит содержится в:
Ralph Castain 2018-06-26 09:13:53 -07:00 коммит произвёл GitHub
родитель abb87f9137 c2bfca19ba
Коммит 0ddbc75ce5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 51 добавлений и 24 удалений

Просмотреть файл

@ -873,6 +873,10 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr
tmp[1], INET6_ADDRSTRLEN),
(int)i, (int)btl_proc->proc_endpoint_count);
continue;
} else if (btl_endpoint->endpoint_state != MCA_BTL_TCP_CLOSED) {
found_match = 1;
match_btl_endpoint = btl_endpoint;
continue;
}
break;
#endif

Просмотреть файл

@ -118,8 +118,8 @@ static int if_linux_ipv6_open(void)
addrbyte[8], addrbyte[9], addrbyte[10], addrbyte[11],
addrbyte[12], addrbyte[13], addrbyte[14], addrbyte[15], scope);
/* we don't want any other scope less than link-local */
if (scope < 0x20) {
/* Only interested in global (0x00) scope */
if (scope != 0x00) {
opal_output_verbose(1, opal_if_base_framework.framework_output,
"skipping interface %2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x:%2x%2x scope %x\n",
addrbyte[0], addrbyte[1], addrbyte[2], addrbyte[3],

Просмотреть файл

@ -52,6 +52,8 @@
#include <netdb.h>
#endif
#include <ctype.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include "opal/util/show_help.h"
#include "opal/util/error.h"
@ -84,6 +86,8 @@
#include "orte/mca/oob/tcp/oob_tcp_peer.h"
#include "orte/mca/oob/tcp/oob_tcp_connection.h"
#include "orte/mca/oob/tcp/oob_tcp_listener.h"
#include "oob_tcp_peer.h"
/*
* Local utility functions
*/
@ -843,6 +847,8 @@ static int parse_uri(const uint16_t af_family,
opal_output (0, "oob_tcp_parse_uri: Could not convert %s\n", host);
return ORTE_ERR_BAD_PARAM;
}
in6->sin6_family = AF_INET6;
in6->sin6_port = htons(atoi(port));
}
#endif
else {
@ -973,6 +979,7 @@ static int component_set_addr(orte_process_name_t *peer,
}
maddr = OBJ_NEW(mca_oob_tcp_addr_t);
((struct sockaddr_storage*) &(maddr->addr))->ss_family = af_family;
if (ORTE_SUCCESS != (rc = parse_uri(af_family, host, ports, (struct sockaddr_storage*) &(maddr->addr)))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(maddr);

Просмотреть файл

@ -30,6 +30,8 @@
#include <unistd.h>
#endif
#include <fcntl.h>
#include <sys/socket.h>
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
@ -77,6 +79,9 @@
#include "orte/mca/oob/tcp/oob_tcp_peer.h"
#include "orte/mca/oob/tcp/oob_tcp_common.h"
#include "orte/mca/oob/tcp/oob_tcp_connection.h"
#include "oob_tcp_peer.h"
#include "oob_tcp_common.h"
#include "oob_tcp_connection.h"
static void tcp_peer_event_init(mca_oob_tcp_peer_t* peer);
static int tcp_peer_send_connect_ack(mca_oob_tcp_peer_t* peer);
@ -86,7 +91,7 @@ static bool tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd,
void* data, size_t size);
static void tcp_peer_connected(mca_oob_tcp_peer_t* peer);
static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer)
static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer, sa_family_t family)
{
int flags;
@ -98,8 +103,7 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer)
"%s oob:tcp:peer creating socket to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer->name))));
peer->sd = socket(AF_INET, SOCK_STREAM, 0);
peer->sd = socket(family, SOCK_STREAM, 0);
if (peer->sd < 0) {
opal_output(0, "%s-%s tcp_peer_create_socket: socket() failed: %s (%d)\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -155,6 +159,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
{
mca_oob_tcp_conn_op_t *op = (mca_oob_tcp_conn_op_t*)cbdata;
mca_oob_tcp_peer_t *peer;
int current_socket_family = 0;
int rc;
opal_socklen_t addrlen = 0;
mca_oob_tcp_addr_t *addr;
@ -171,30 +176,12 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer->name)));
rc = tcp_peer_create_socket(peer);
if (ORTE_SUCCESS != rc) {
/* FIXME: we cannot create a TCP socket - this spans
* all interfaces, so all we can do is report
* back to the component that this peer is
* unreachable so it can remove the peer
* from its list and report back to the base
* NOTE: this could be a reconnect attempt,
* so we also need to mark any queued messages
* and return them as "unreachable"
*/
opal_output(0, "%s CANNOT CREATE SOCKET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
ORTE_FORCED_TERMINATE(1);
OBJ_RELEASE(op);
return;
}
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
"%s orte_tcp_peer_try_connect: "
"attempting to connect to proc %s on socket %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
addrlen = sizeof(struct sockaddr_in);
peer->active_addr = NULL;
OPAL_LIST_FOREACH(addr, &peer->addrs, mca_oob_tcp_addr_t) {
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
@ -222,9 +209,36 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
continue;
}
peer->active_addr = addr; // record the one we are using
addrlen = addr->addr.ss_family == AF_INET6 ? sizeof(struct sockaddr_in6)
: sizeof(struct sockaddr_in);
if (addr->addr.ss_family != current_socket_family) {
if (peer->sd >= 0) {
CLOSE_THE_SOCKET(peer->sd);
peer->sd = -1;
}
rc = tcp_peer_create_socket(peer, addr->addr.ss_family);
current_socket_family = addr->addr.ss_family;
if (ORTE_SUCCESS != rc) {
/* FIXME: we cannot create a TCP socket - this spans
* all interfaces, so all we can do is report
* back to the component that this peer is
* unreachable so it can remove the peer
* from its list and report back to the base
* NOTE: this could be a reconnect attempt,
* so we also need to mark any queued messages
* and return them as "unreachable"
*/
opal_output(0, "%s CANNOT CREATE SOCKET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
ORTE_FORCED_TERMINATE(1);
goto cleanup;
}
}
retry_connect:
addr->retries++;
if (connect(peer->sd, (struct sockaddr*)&addr->addr, addrlen) < 0) {
rc = connect(peer->sd, (struct sockaddr*) &addr->addr, addrlen);
if (rc < 0) {
/* non-blocking so wait for completion */
if (opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) {
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,

Просмотреть файл

@ -277,6 +277,7 @@ static int create_listen(void)
}
/* get the address info for this interface */
memset(&inaddr, 0, sizeof(inaddr));
((struct sockaddr_in*) &inaddr)->sin_family = AF_INET;
((struct sockaddr_in*) &inaddr)->sin_addr.s_addr = INADDR_ANY;
addrlen = sizeof(struct sockaddr_in);
@ -529,6 +530,7 @@ static int create_listen6(void)
}
/* get the address info for this interface */
memset(&inaddr, 0, sizeof(inaddr));
((struct sockaddr_in6*) &inaddr)->sin6_family = AF_INET6;
((struct sockaddr_in6*) &inaddr)->sin6_addr = in6addr_any;
addrlen = sizeof(struct sockaddr_in6);