From 8f34329efe95b24ca2e492085e141ed86a677c6b Mon Sep 17 00:00:00 2001 From: Karol Mroz Date: Wed, 4 May 2016 08:14:17 +0200 Subject: [PATCH 1/4] configure: add ethtool and related checks Signed-off-by: Karol Mroz --- configure.ac | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index b965d55b20..158fae0abf 100644 --- a/configure.ac +++ b/configure.ac @@ -577,7 +577,8 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ dlfcn.h execinfo.h err.h fcntl.h grp.h libgen.h \ libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ poll.h pthread.h pty.h pwd.h sched.h \ - strings.h stropts.h sys/fcntl.h sys/ipc.h sys/shm.h \ + strings.h stropts.h linux/ethtool.h linux/sockios.h \ + net/if.h sys/fcntl.h sys/ipc.h sys/shm.h \ sys/ioctl.h sys/mman.h sys/param.h sys/queue.h \ sys/resource.h sys/select.h sys/socket.h sys/sockio.h \ sys/stat.h sys/statfs.h sys/statvfs.h sys/time.h sys/tree.h \ @@ -628,13 +629,28 @@ AC_CACHE_SAVE opal_show_title "Type tests" AC_CHECK_TYPES([socklen_t, struct sockaddr_in, struct sockaddr_in6, - struct sockaddr_storage], + struct sockaddr_storage, struct ifreq, struct ethtool_cmd], [], [], [AC_INCLUDES_DEFAULT #if HAVE_SYS_SOCKET_H #include #endif #ifdef HAVE_NETINET_IN_H #include +#endif +#ifdef HAVE_LINUX_ETHTOOL_H +#include +#endif +#ifdef HAVE_NET_IF_H +#include +#endif]) + +AC_CHECK_DECLS([ethtool_cmd_speed, SIOCETHTOOL], + [], [], [AC_INCLUDES_DEFAULT +#ifdef HAVE_LINUX_ETHTOOL_H +#include +#endif +#ifdef HAVE_LINUX_SOCKIOS_H +#include #endif]) AC_CHECK_DECLS([AF_UNSPEC, PF_UNSPEC, AF_INET6, PF_INET6], From 31e33a64f951b0fb342f593fe0a2c8c31eb4475f Mon Sep 17 00:00:00 2001 From: Karol Mroz Date: Wed, 4 May 2016 08:21:25 +0200 Subject: [PATCH 2/4] opal/util: add function to obtain interface speed If kernel ethtool_cmd_speed() is not available, use copies if possible. Signed-off-by: Karol Mroz --- opal/util/Makefile.am | 2 ++ opal/util/ethtool.c | 83 +++++++++++++++++++++++++++++++++++++++++++ opal/util/ethtool.h | 20 +++++++++++ 3 files changed, 105 insertions(+) create mode 100644 opal/util/ethtool.c create mode 100644 opal/util/ethtool.h diff --git a/opal/util/Makefile.am b/opal/util/Makefile.am index 5c4cb2945e..e75f531332 100644 --- a/opal/util/Makefile.am +++ b/opal/util/Makefile.am @@ -43,6 +43,7 @@ headers = \ cmd_line.h \ crc.h \ daemon_init.h \ + ethtool.h \ error.h \ fd.h \ few.h \ @@ -78,6 +79,7 @@ libopalutil_la_SOURCES = \ cmd_line.c \ crc.c \ daemon_init.c \ + ethtool.c \ error.c \ fd.c \ few.c \ diff --git a/opal/util/ethtool.c b/opal/util/ethtool.c new file mode 100644 index 0000000000..91a4a7ed64 --- /dev/null +++ b/opal/util/ethtool.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2016 Karol Mroz. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_NET_IF_H +#include +#endif +#ifdef HAVE_LINUX_ETHTOOL_H +#include +#endif +#ifdef HAVE_SYS_IOCTL_H +#include +#endif +#ifdef HAVE_LINUX_SOCKIOS_H +#include +#endif + +#include "opal/util/ethtool.h" +#include "opal/util/if.h" + +#if !defined(HAVE_DECL_ETHTOOL_CMD_SPEED) +static inline unsigned int +ethtool_cmd_speed(const struct ethtool_cmd *ep) +{ + return (ep->speed_hi << 16) | ep->speed; +} +#endif + +/* + * Obtain an appropriate bandwidth for the interface if_name. On Linux, we + * get this via an ioctl(). Elsewhere or in the error case, we return the + * speed as 0. + */ +unsigned int +opal_ethtool_get_speed (const char *if_name) +{ + unsigned int speed = 0; + +#if defined(HAVE_DECL_SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ) && defined(HAVE_STRUCT_ETHTOOL_CMD) + int sockfd; + struct ifreq ifr; + struct ethtool_cmd edata = { + .cmd = ETHTOOL_GSET, + }; + + sockfd = socket(PF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + goto out; + } + + memset(&ifr, 0, sizeof(struct ifreq)); + strncpy(ifr.ifr_name, if_name, IF_NAMESIZE); + ifr.ifr_data = (char *)&edata; + + if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { + goto out; + } + + speed = ethtool_cmd_speed(&edata); + if (UINT_MAX == speed) { + speed = 0; + } + +out: + close(sockfd); + return speed; +#else + return speed; +#endif +} diff --git a/opal/util/ethtool.h b/opal/util/ethtool.h new file mode 100644 index 0000000000..7405a06f8a --- /dev/null +++ b/opal/util/ethtool.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2016 Karol Mroz. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_ETHTOOL_H +#define OPAL_ETHTOOL_H + +/* + * Obtain an appropriate bandwidth for the interface if_name. On Linux, we + * get this via an ioctl(). Elsewhere or in the error case, we return the + * speed as 0. + */ +unsigned int opal_ethtool_get_speed(const char *if_name); + +#endif From b9c6c43c6bf6fafc28571a6a153015e8db9fc936 Mon Sep 17 00:00:00 2001 From: Karol Mroz Date: Wed, 4 May 2016 08:23:05 +0200 Subject: [PATCH 3/4] btl/tcp: add default defines for bandwidth and latency Signed-off-by: Karol Mroz --- opal/mca/btl/tcp/btl_tcp_component.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index b24cbaf973..94471516f3 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -85,6 +85,9 @@ #include "opal/mca/common/cuda/common_cuda.h" #endif /* OPAL_CUDA_SUPPORT */ +#define MCA_BTL_TCP_BTL_BANDWIDTH 100 +#define MCA_BTL_TCP_BTL_LATENCY 100 + /* * Local functions */ @@ -320,8 +323,8 @@ static int mca_btl_tcp_component_register(void) MCA_BTL_FLAGS_HETEROGENEOUS_RDMA | MCA_BTL_FLAGS_SEND; - mca_btl_tcp_module.super.btl_bandwidth = 100; - mca_btl_tcp_module.super.btl_latency = 100; + mca_btl_tcp_module.super.btl_bandwidth = MCA_BTL_TCP_BTL_BANDWIDTH; + mca_btl_tcp_module.super.btl_latency = MCA_BTL_TCP_BTL_LATENCY; mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version, &mca_btl_tcp_module.super); From ca6ddf3270ec77772599cc5034ba7591c11f96c1 Mon Sep 17 00:00:00 2001 From: Karol Mroz Date: Wed, 4 May 2016 08:24:23 +0200 Subject: [PATCH 4/4] btl/tcp: autodetect bandwidth and latency if unset Fixes open-mpi/ompi#120 Signed-off-by: Karol Mroz --- opal/mca/btl/tcp/btl_tcp_component.c | 29 ++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 94471516f3..aec8ecabf0 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -56,6 +56,7 @@ #include #include "opal/mca/event/event.h" +#include "opal/util/ethtool.h" #include "opal/util/if.h" #include "opal/util/output.h" #include "opal/util/argv.h" @@ -323,8 +324,11 @@ static int mca_btl_tcp_component_register(void) MCA_BTL_FLAGS_HETEROGENEOUS_RDMA | MCA_BTL_FLAGS_SEND; - mca_btl_tcp_module.super.btl_bandwidth = MCA_BTL_TCP_BTL_BANDWIDTH; - mca_btl_tcp_module.super.btl_latency = MCA_BTL_TCP_BTL_LATENCY; + /* Bandwidth and latency initially set to 0. May be overridden during + * mca_btl_tcp_create(). + */ + mca_btl_tcp_module.super.btl_bandwidth = 0; + mca_btl_tcp_module.super.btl_latency = 0; mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version, &mca_btl_tcp_module.super); @@ -516,6 +520,27 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name) /* allow user to override/specify latency ranking */ sprintf(param, "latency_%s:%d", if_name, i); mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_latency, OPAL_INFO_LVL_5, &btl->super.btl_latency); + + /* Only attempt to auto-detect bandwidth and/or latency if it is 0. + * + * If detection fails to return anything other than 0, set a default + * bandwidth and latency. + */ + if (0 == btl->super.btl_bandwidth) { + unsigned int speed = opal_ethtool_get_speed(if_name); + btl->super.btl_bandwidth = (speed == 0) ? MCA_BTL_TCP_BTL_BANDWIDTH : speed; + if (i > 0) { + btl->super.btl_bandwidth >>= 1; + } + } + /* We have no runtime btl latency detection mechanism. Just set a default. */ + if (0 == btl->super.btl_latency) { + btl->super.btl_latency = MCA_BTL_TCP_BTL_LATENCY; + if (i > 0) { + btl->super.btl_latency <<= 1; + } + } + #if 0 && OPAL_ENABLE_DEBUG BTL_OUTPUT(("interface %s instance %i: bandwidth %d latency %d\n", if_name, i, btl->super.btl_bandwidth, btl->super.btl_latency));