1
1

Add support for TCP keepalive on inter-node sockets

Этот коммит содержится в:
Ralph Castain 2015-03-16 09:59:44 -07:00
родитель 0cfb4f29aa
Коммит 69ac25bf55
3 изменённых файлов: 122 добавлений и 17 удалений

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -42,32 +42,28 @@
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_NETINET_TCP_H
#include <netinet/tcp.h>
#endif
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#include <ctype.h>
#include "opal/util/show_help.h"
#include "opal/util/error.h"
#include "opal/util/output.h"
#include "opal/opal_socket_errno.h"
#include "opal/util/if.h"
#include "opal/util/net.h"
#include "opal/util/argv.h"
#include "opal/class/opal_hash_table.h"
#include "opal/class/opal_list.h"
#include "opal/mca/backtrace/backtrace.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ess/ess.h"
#include "orte/util/name_fns.h"
#include "orte/util/parse_options.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/oob/tcp/oob_tcp.h"
#include "orte/mca/oob/tcp/oob_tcp_component.h"
#include "oob_tcp_peer.h"
@ -82,7 +78,7 @@ void orte_oob_tcp_set_socket_options(int sd)
#if defined(TCP_NODELAY)
int optval;
optval = 1;
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
if (setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
opal_backtrace_print(stderr, NULL, 1);
opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)",
__FILE__, __LINE__,
@ -91,8 +87,8 @@ void orte_oob_tcp_set_socket_options(int sd)
}
#endif
#if defined(SO_SNDBUF)
if(mca_oob_tcp_component.tcp_sndbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
if (mca_oob_tcp_component.tcp_sndbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
opal_output(0, "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
@ -100,14 +96,88 @@ void orte_oob_tcp_set_socket_options(int sd)
}
#endif
#if defined(SO_RCVBUF)
if(mca_oob_tcp_component.tcp_rcvbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
if (mca_oob_tcp_component.tcp_rcvbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
opal_output(0, "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
}
#endif
#if defined(SO_KEEPALIVE)
if (0 < mca_oob_tcp_component.keepalive_time) {
int option;
socklen_t optlen;
/* see if the keepalive option is available */
optlen = sizeof(option);
if (getsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, &optlen) < 0) {
/* not available, so just return */
return;
}
/* Set the option active */
option = 1;
if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) {
opal_output(0, "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
if (mca_oob_tcp_component.tcp_proto < 0) {
/* we don't know the TCP protocol number */
return;
}
#if defined(TCP_KEEPALIVE)
/* set the idle time */
if (setsockopt(sd, mca_oob_tcp_component.tcp_proto, TCP_KEEPALIVE,
&mca_oob_tcp_component.keepalive_time,
sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
#elif defined(TCP_KEEPIDLE)
/* set the idle time */
if (setsockopt(sd, mca_oob_tcp_component.tcp_proto, TCP_KEEPIDLE,
&mca_oob_tcp_component.keepalive_time,
sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
#endif // TCP_KEEPIDLE
#if defined(TCP_KEEPINTVL)
/* set the keepalive interval */
if (setsockopt(sd, mca_oob_tcp_component.tcp_proto, TCP_KEEPINTVL,
&mca_oob_tcp_component.keepalive_intvl,
sizeof(mca_oob_tcp_component.keepalive_intvl)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
#endif // TCP_KEEPINTVL
#if defined(TCP_KEEPCNT)
/* set the miss rate */
if (setsockopt(sd, mca_oob_tcp_component.tcp_proto, TCP_KEEPCNT,
&mca_oob_tcp_component.keepalive_probes,
sizeof(mca_oob_tcp_component.keepalive_probes)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
}
}
#endif // TCP_KEEPCNT
#endif // SO_KEEPALIVE
}
mca_oob_tcp_peer_t* mca_oob_tcp_peer_lookup(const orte_process_name_t *name)

Просмотреть файл

@ -410,6 +410,37 @@ static int tcp_component_register(void)
&mca_oob_tcp_component.disable_ipv6_family);
#endif
mca_oob_tcp_component.keepalive_time = -1;
(void)mca_base_component_var_register(component, "keepalive_time",
"Idle time in seconds before starting to send keepalives (num <= 0 => disable keepalive)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_oob_tcp_component.keepalive_time);
if (0 < mca_oob_tcp_component.keepalive_time) {
struct protoent *proto;
if (NULL != (proto = getprotobyname("TCP"))) {
mca_oob_tcp_component.tcp_proto = proto->p_proto;
} else {
mca_oob_tcp_component.tcp_proto = -1;
}
}
mca_oob_tcp_component.keepalive_intvl = 5;
(void)mca_base_component_var_register(component, "keepalive_intvl",
"Time between keepalives, in seconds",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_oob_tcp_component.keepalive_intvl);
mca_oob_tcp_component.keepalive_probes = 3;
(void)mca_base_component_var_register(component, "keepalive_probes",
"Number of keepalives that can be missed before declaring error",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_oob_tcp_component.keepalive_probes);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -77,6 +77,10 @@ typedef struct {
bool listen_thread_active;
struct timeval listen_thread_tv; /**< Timeout when using listen thread */
int stop_thread[2]; /**< pipe used to exit the listen thread */
int keepalive_probes; /**< number of keepalives that can be missed before declaring error */
int keepalive_time; /**< idle time in seconds before starting to send keepalives */
int keepalive_intvl; /**< time between keepalives, in seconds */
int tcp_proto; /**< TCP protocol number */
} mca_oob_tcp_component_t;
ORTE_MODULE_DECLSPEC extern mca_oob_tcp_component_t mca_oob_tcp_component;