From 9ef523c1529e7bfaa2712f1e46d67dd2c3c204fc Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 17 Feb 2015 20:43:32 -0800 Subject: [PATCH] Add reachable framework for determining TCP connections --- opal/mca/reachable/Makefile.am | 32 ++ opal/mca/reachable/base/Makefile.am | 17 + opal/mca/reachable/base/base.h | 34 ++ .../mca/reachable/base/reachable_base_frame.c | 51 +++ .../reachable/base/reachable_base_select.c | 47 +++ opal/mca/reachable/netlink/Makefile.am | 44 +++ opal/mca/reachable/netlink/configure.m4 | 178 ++++++++++ opal/mca/reachable/netlink/libnl1_utils.h | 97 ++++++ opal/mca/reachable/netlink/libnl3_utils.h | 80 +++++ opal/mca/reachable/netlink/libnl_utils.h | 62 ++++ .../mca/reachable/netlink/reachable_netlink.h | 27 ++ .../netlink/reachable_netlink_component.c | 93 +++++ .../netlink/reachable_netlink_module.c | 48 +++ .../netlink/reachable_netlink_utils_common.c | 322 ++++++++++++++++++ opal/mca/reachable/reachable.h | 75 ++++ opal/mca/reachable/weighted/.opal_ignore | 0 opal/mca/reachable/weighted/Makefile.am | 34 ++ .../reachable/weighted/reachable_weighted.c | 275 +++++++++++++++ .../reachable/weighted/reachable_weighted.h | 41 +++ .../weighted/reachable_weighted_component.c | 101 ++++++ 20 files changed, 1658 insertions(+) create mode 100644 opal/mca/reachable/Makefile.am create mode 100644 opal/mca/reachable/base/Makefile.am create mode 100644 opal/mca/reachable/base/base.h create mode 100644 opal/mca/reachable/base/reachable_base_frame.c create mode 100644 opal/mca/reachable/base/reachable_base_select.c create mode 100644 opal/mca/reachable/netlink/Makefile.am create mode 100644 opal/mca/reachable/netlink/configure.m4 create mode 100644 opal/mca/reachable/netlink/libnl1_utils.h create mode 100644 opal/mca/reachable/netlink/libnl3_utils.h create mode 100644 opal/mca/reachable/netlink/libnl_utils.h create mode 100644 opal/mca/reachable/netlink/reachable_netlink.h create mode 100644 opal/mca/reachable/netlink/reachable_netlink_component.c create mode 100644 opal/mca/reachable/netlink/reachable_netlink_module.c create mode 100644 opal/mca/reachable/netlink/reachable_netlink_utils_common.c create mode 100644 opal/mca/reachable/reachable.h create mode 100644 opal/mca/reachable/weighted/.opal_ignore create mode 100644 opal/mca/reachable/weighted/Makefile.am create mode 100644 opal/mca/reachable/weighted/reachable_weighted.c create mode 100644 opal/mca/reachable/weighted/reachable_weighted.h create mode 100644 opal/mca/reachable/weighted/reachable_weighted_component.c diff --git a/opal/mca/reachable/Makefile.am b/opal/mca/reachable/Makefile.am new file mode 100644 index 0000000000..9fa66a78a8 --- /dev/null +++ b/opal/mca/reachable/Makefile.am @@ -0,0 +1,32 @@ +# +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(LTDLINCL) $(reachable_CPPFLAGS) + +# main library setup +noinst_LTLIBRARIES = libmca_reachable.la +libmca_reachable_la_SOURCES = + +# pkgdata setup +dist_opaldata_DATA = + +# local files +headers = reachable.h +libmca_reachable_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +opaldir = $(opalincludedir)/$(subdir) +nobase_opal_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/reachable/base/Makefile.am b/opal/mca/reachable/base/Makefile.am new file mode 100644 index 0000000000..9214aae681 --- /dev/null +++ b/opal/mca/reachable/base/Makefile.am @@ -0,0 +1,17 @@ +# +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_reachable_la_SOURCES += \ + base/reachable_base_frame.c \ + base/reachable_base_select.c diff --git a/opal/mca/reachable/base/base.h b/opal/mca/reachable/base/base.h new file mode 100644 index 0000000000..ed737e7841 --- /dev/null +++ b/opal/mca/reachable/base/base.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef MCA_REACHABLE_BASE_H +#define MCA_REACHABLE_BASE_H + +#include "opal_config.h" +#include "opal/types.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_framework.h" + +#include "opal/mca/reachable/reachable.h" + +BEGIN_C_DECLS + +OPAL_DECLSPEC extern mca_base_framework_t opal_reachable_base_framework; + +/** + * Select a reachable module + */ +OPAL_DECLSPEC int opal_reachable_base_select(void); + +END_C_DECLS + +#endif diff --git a/opal/mca/reachable/base/reachable_base_frame.c b/opal/mca/reachable/base/reachable_base_frame.c new file mode 100644 index 0000000000..b4caee8103 --- /dev/null +++ b/opal/mca/reachable/base/reachable_base_frame.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "opal_config.h" +#include "opal/constants.h" + +#include "opal/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/base.h" + +#include "opal/mca/reachable/base/base.h" + + +/* + * The following file was created by configure. It contains extern + * components and the definition of an array of pointers to each + * module's public mca_base_module_t struct. + */ + +#include "opal/mca/reachable/base/static-components.h" + +opal_reachable_base_module_t opal_reachable; + +static int opal_reachable_base_frame_register(mca_base_register_flag_t flags) +{ + return OPAL_SUCCESS; +} + +static int opal_reachable_base_frame_close(void) +{ + return mca_base_framework_components_close(&opal_reachable_base_framework, NULL); +} + +static int opal_reachable_base_frame_open(mca_base_open_flag_t flags) +{ + /* Open up all available components */ + return mca_base_framework_components_open(&opal_reachable_base_framework, flags); +} + +MCA_BASE_FRAMEWORK_DECLARE(opal, reachable, "OPAL Reachability Framework", + opal_reachable_base_frame_register, + opal_reachable_base_frame_open, + opal_reachable_base_frame_close, + mca_reachable_base_static_components, 0); diff --git a/opal/mca/reachable/base/reachable_base_select.c b/opal/mca/reachable/base/reachable_base_select.c new file mode 100644 index 0000000000..a1e0080765 --- /dev/null +++ b/opal/mca/reachable/base/reachable_base_select.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/reachable/reachable.h" +#include "opal/mca/reachable/base/base.h" + +/* + * Globals + */ + +int opal_reachable_base_select(void) +{ + int ret; + opal_reachable_base_component_t *best_component = NULL; + opal_reachable_base_module_t *best_module = NULL; + + /* + * Select the best component + */ + if( OPAL_SUCCESS != mca_base_select("reachable", opal_reachable_base_framework.framework_output, + &opal_reachable_base_framework.framework_components, + (mca_base_module_t **) &best_module, + (mca_base_component_t **) &best_component) ) { + /* notify caller that no available component found */ + return OPAL_ERR_NOT_FOUND; + } + + /* Save the winner */ + opal_reachable = *best_module; + + /* Initialize the winner */ + ret = opal_reachable.init(); + + return ret; +} diff --git a/opal/mca/reachable/netlink/Makefile.am b/opal/mca/reachable/netlink/Makefile.am new file mode 100644 index 0000000000..19a411e5ab --- /dev/null +++ b/opal/mca/reachable/netlink/Makefile.am @@ -0,0 +1,44 @@ +# +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + reachable_netlink.h \ + reachable_netlink_component.c \ + reachable_netlink_module.c \ + libnl1_utils.h \ + libnl3_utils.h \ + libnl_utils.h \ + reachable_netlink_utils_common.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_reachable_netlink_DSO +component_noinst = +component_install = mca_reachable_netlink.la +else +component_noinst = libmca_reachable_netlink.la +component_install = +endif + +AM_CPPFLAGS = \ + $(opal_reachable_netlink_LIBNL_CPPFLAGS) \ + -DHAVE_LIBNL3=$(HAVE_LIBNL3) + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_reachable_netlink_la_SOURCES = $(sources) +mca_reachable_netlink_la_LDFLAGS = -module -avoid-version +mca_reachable_netlink_la_LIBADD = $(opal_reachable_netlink_LIBNL_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_reachable_netlink_la_SOURCES =$(sources) +libmca_reachable_netlink_la_LDFLAGS = -module -avoid-version +libmca_reachable_netlink_la_LIBADD = $(opal_reachable_netlink_LIBNL_LIBS) diff --git a/opal/mca/reachable/netlink/configure.m4 b/opal/mca/reachable/netlink/configure.m4 new file mode 100644 index 0000000000..8f28a1ef68 --- /dev/null +++ b/opal/mca/reachable/netlink/configure.m4 @@ -0,0 +1,178 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dnl +dnl Portions of this software copied from libfabric +dnl (https://github.com/ofiwg/libfabric) +dnl + +dnl BSD license +dnl +dnl Redistribution and use in source and binary forms, with or without +dnl modification, are permitted provided that the following conditions +dnl are met: +dnl +dnl * Redistributions of source code must retain the above copyright +dnl notice, this list of conditions and the following disclaimer. +dnl +dnl * Redistributions in binary form must reproduce the above +dnl copyright notice, this list of conditions and the following +dnl disclaimer in the documentation and/or other materials provided +dnl with the distribution. +dnl +dnl THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +dnl "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +dnl LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +dnl FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +dnl COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +dnl INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +dnl BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +dnl CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +dnl LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +dnl ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +dnl POSSIBILITY OF SUCH DAMAGE. + + +dnl Check for libnl; prefer version 3 instead of version 1. Abort (i.e., +dnl AC_MSG_ERROR) if neither libnl v1 or v3 can be found. +dnl +dnl Outputs: +dnl +dnl - Set $1 to the CPPFLAGS necessary to compile with libnl +dnl - Set $2 to the LIBS necessary to link with libnl +dnl - If $3 is 1, AC_MSG_ERROR (i.e., abort) if neither libnl or +dnl libnl3 can be found +dnl - Set HAVE_LIBNL3 to 1 if libnl3 will be used; 0 if libnl1 will be used +dnl - AC_SUBST $HAVE_LIBNL3 +dnl - AC_DEFINE HAVE_LIBNL3 +dnl +dnl -------------------------------------------------------- +AC_DEFUN([OPAL_REACHABLE_NETLINK_CHECK_LIBNL3],[ + # More libnl v1/v3 sadness: the two versions are not compatible + # and will not work correctly if simultaneously linked into the + # same applications. Unfortunately, they *will* link into the + # same image! On platforms like SLES 12, libibverbs depends on + # libnl-3.so.200 and friends, while a naive implementation of + # our configure logic would link libnl.so.1 to libdaplusnic, + # resulting in both versions in the dependency map at the same + # time. As a coarse fix, just check for libnl-3 first and use + # it if present on the system. + + # GROSS: libnl wants us to either use pkg-config (which we + # can't assume is always present) or we need to look in a + # particular directory for the right libnl3 include files. For + # now, just hard code the special path into this logic. + + save_CPPFLAGS=$CPPFLAGS + save_LIBS=$LIBS + + $1="-I/usr/include/libnl3" + CPPFLAGS="$$1 $CPPFLAGS" + AC_MSG_CHECKING([for /usr/include/libnl3]) + AS_IF([test -d "/usr/include/libnl3"], + [AC_MSG_RESULT([present]) + AC_CHECK_HEADER( + [netlink/version.h], + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[ +#include +#include +#ifndef LIBNL_VER_MAJ +#error "LIBNL_VER_MAJ not defined!" +#endif +/* to the best of our knowledge, version.h only exists in libnl3 */ +#if LIBNL_VER_MAJ < 3 +#error "LIBNL_VER_MAJ < 3, this is very unusual" +#endif + ]],[[/* empty body */]])], + [HAVE_LIBNL3=1], dnl our program compiled + [HAVE_LIBNL3=0])], dnl our program failed to compile + [HAVE_LIBNL3=0], dnl AC_CHECK_HEADER failed + [#include + ])], + [AC_MSG_RESULT([missing]) + HAVE_LIBNL3=0]) dnl "/usr/include/libnl3" does not exist + + # nl_recvmsgs_report is a symbol that is only present in v3 + AS_IF([test "$HAVE_LIBNL3" -eq 1], + [AC_SEARCH_LIBS([nl_recvmsgs_report], [nl-3], + [# We also need libnl-route-3 + AC_SEARCH_LIBS([nl_rtgen_request], [nl-route-3], + [$2="-lnl-3 -lnl-route-3" + HAVE_LIBNL3=1], + [HAVE_LIBNL3=0])], + [HAVE_LIBNL3=0])]) + + AS_IF([test "$HAVE_LIBNL3" -eq 1], + [AC_MSG_NOTICE([using libnl-3])], + [# restore $1 since we are falling back to libnl (v1) + $1="" + AC_SEARCH_LIBS([nl_connect], [nl], + [$2="-lnl"], + [AC_MSG_WARN([Cannot find libnl-3 nor libnl]) + AS_IF([test "$3" = "1"], + [AC_MSG_ERROR([Cannot continue])]) + ]) + AC_MSG_NOTICE([using libnl (v1)])]) + + # libnl_utils.h does not include configure-generated config.h, + # so it may not see the HAVE_LIBNL3 #define. Hence, we set + # HAVE_LIBNL3 as both a C preprocessor macro (in case some + # other file includes config.h before libnl_utils.h) and a + # Makefile macro (so that the app can set HAVE_LIBNL3 via + # CPPFLAGS). Also, this macro may be used in multiple + # different libraries; setting HAVE_LIBNL3 both ways lets the + # application choose which way to set it. + AC_SUBST([HAVE_LIBNL3]) + AC_DEFINE_UNQUOTED([HAVE_LIBNL3],[$HAVE_LIBNL3], + [set to 1 if should use libnl v3, set to 0 for libnl v11]) + + LIBS=$save_LIBS + AS_UNSET([save_LIBS]) + CPPFLAGS=$save_CPPFLAGS + AS_UNSET([save_CPPFLAGS]) +]) + +dnl ============================================================== + +# MCA_opal_reachable_netlink_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_reachable_netlink_CONFIG],[ + AC_CONFIG_FILES([opal/mca/reachable/netlink/Makefile]) + + OPAL_VAR_SCOPE_PUSH([opal_reachable_netlink_happy]) + + opal_reachable_netlink_happy=1 + AC_CHECK_HEADER([linux/netlink.h], [], + [opal_reachable_netlink_happy=0], [ +#include +#include +]) + + AS_IF([test $opal_reachable_netlink_happy -eq 1], + [OPAL_REACHABLE_NETLINK_CHECK_LIBNL3( + [opal_reachable_netlink_LIBNL_CPPFLAGS], + [opal_reachable_netlink_LIBNL_LIBS], + [0]) + ]) + AS_IF([test "$opal_reachable_netlink_LIBNL_LIBS" == ""], + [opal_reachable_netlink_happy=0]) + + AC_SUBST(opal_reachable_netlink_LIBNL_CPPFLAGS) + AC_SUBST(opal_reachable_netlink_LIBNL_LIBS) + + AS_IF([test $opal_reachable_netlink_happy -eq 1], + [$1], + [$2]) + + OPAL_VAR_SCOPE_POP() +]) diff --git a/opal/mca/reachable/netlink/libnl1_utils.h b/opal/mca/reachable/netlink/libnl1_utils.h new file mode 100644 index 0000000000..6665c58711 --- /dev/null +++ b/opal/mca/reachable/netlink/libnl1_utils.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. + * + * Portions of this software copied from libfabric + * (https://github.com/ofiwg/libfabric) + * + * LICENSE_BEGIN + * + * BSD license: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * LICENSE_END + * + * + */ + +#ifndef LIBNL1_UTILS_H +#define LIBNL1_UTILS_H + +#include +#include +#include +#include +#include + +typedef struct nl_handle NL_HANDLE; + +#define NLMSG_SIZE(size) nlmsg_msg_size(size) +#define NL_GETERROR(err) nl_geterror() +#define NL_HANDLE_ALLOC nl_handle_alloc +#define NL_HANDLE_FREE nl_handle_destroy +#define NL_DISABLE_SEQ_CHECK nl_disable_sequence_check +#define INC_CB_MSGCNT(arg) \ + do { \ + arg->msg_cnt++; \ + } while (0) + +/* + * the return value of nl_recvmsgs_default does not tell + * whether it returns because of successful read or socket + * timeout. This is a limitation in libnl1. So we compare + * message count before and after the call to decide if there + * is no new message arriving. In this case, this function + * needs to terminate to prevent the caller from + * blocking forever. + * NL_CB_MSG_IN traps every received message, so + * there should be no premature exit + */ +#define NL_RECVMSGS(nlh, cb_arg, rc, err, out) \ + do { \ + int msg_cnt = cb_arg.msg_cnt; \ + err = nl_recvmsgs_default(nlh); \ + if (err < 0) { \ + opal_output(0, "Failed to receive netlink reply message, error %s\n", \ + NL_GETERROR(err)); \ + goto out; \ + } \ + if (msg_cnt == cb_arg.msg_cnt) {\ + err = rc; \ + goto out; \ + } \ + } while (0) + +struct usnic_rt_cb_arg { + uint32_t nh_addr; + int oif; + int found; + int msg_cnt; + struct usnic_nl_sk *unlsk; +}; + +#endif /* LIBNL1_UTILS_H */ diff --git a/opal/mca/reachable/netlink/libnl3_utils.h b/opal/mca/reachable/netlink/libnl3_utils.h new file mode 100644 index 0000000000..ea99c88fc1 --- /dev/null +++ b/opal/mca/reachable/netlink/libnl3_utils.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. + * + * Portions of this software copied from libfabric + * (https://github.com/ofiwg/libfabric) + * + * LICENSE_BEGIN + * + * BSD license: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * LICENSE_END + */ + +#ifndef LIBNL3_UTILS_H +#define LIBNL3_UTILS_H + +#include +#include +#include +#include +#include + +typedef struct nl_sock NL_HANDLE; + +#define NLMSG_SIZE(size) nlmsg_size(size) +#define NL_GETERROR(err) nl_geterror(err) +#define NL_HANDLE_ALLOC nl_socket_alloc +#define NL_HANDLE_FREE nl_socket_free +#define NL_DISABLE_SEQ_CHECK nl_socket_disable_seq_check +#define INC_CB_MSGCNT(arg) + +/* err will be returned as -NLE_AGAIN */ +/* if the socket times out */ +#define NL_RECVMSGS(nlh, cb_arg, rc, err, out) \ + do { \ + err = nl_recvmsgs_default(nlh); \ + if (err < 0) { \ + opal_output(0, "Failed to receive netlink reply message, error %s\n", \ + NL_GETERROR(err)); \ + if (err == -NLE_AGAIN) \ + err = rc; \ + goto out; \ + } \ + } while (0) + +struct usnic_rt_cb_arg { + uint32_t nh_addr; + int oif; + int found; + int replied; + struct usnic_nl_sk *unlsk; +}; + +#endif /* LIBNL3_UTILS_H */ diff --git a/opal/mca/reachable/netlink/libnl_utils.h b/opal/mca/reachable/netlink/libnl_utils.h new file mode 100644 index 0000000000..379f8546c2 --- /dev/null +++ b/opal/mca/reachable/netlink/libnl_utils.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2014-2015, Cisco Systems, Inc. All rights reserved. + * + * Portions of this software copied from libfabric + * (https://github.com/ofiwg/libfabric) + * + * LICENSE_BEGIN + * + * BSD license: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * LICENSE_END + * + * + */ + +#ifndef LIBNL_UTILS_H +#define LIBNL_UTILS_H + +#if !defined (HAVE_LIBNL3) +#error You must define HAVE_LIBNL3 to 0 or 1 before including libnl_utils.h +#elif HAVE_LIBNL3 +#include "libnl3_utils.h" +#else +#include "libnl1_utils.h" +#endif + +struct usnic_nl_sk { + NL_HANDLE *nlh; + uint32_t seq; +}; + +int opal_reachable_netlink_nl_rt_lookup(uint32_t src_addr, + uint32_t dst_addr, int oif, + uint32_t *nh_addr); + +#endif /* LIBNL_UTILS_H */ diff --git a/opal/mca/reachable/netlink/reachable_netlink.h b/opal/mca/reachable/netlink/reachable_netlink.h new file mode 100644 index 0000000000..3581d89a31 --- /dev/null +++ b/opal/mca/reachable/netlink/reachable_netlink.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_REACHABLE_NETLINK_H +#define MCA_REACHABLE_NETLINK_H + +#include "opal_config.h" + +#include "opal/mca/reachable/reachable.h" + +BEGIN_C_DECLS + +OPAL_DECLSPEC extern opal_reachable_base_component_t + mca_reachable_netlink_component; + +OPAL_DECLSPEC extern const opal_reachable_base_module_t + opal_reachable_netlink_module; + +END_C_DECLS + +#endif /* MCA_REACHABLE_NETLINK_H */ diff --git a/opal/mca/reachable/netlink/reachable_netlink_component.c b/opal/mca/reachable/netlink/reachable_netlink_component.c new file mode 100644 index 0000000000..f951072475 --- /dev/null +++ b/opal/mca/reachable/netlink/reachable_netlink_component.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/util/proc.h" +#include "opal/mca/reachable/reachable.h" +#include "reachable_netlink.h" + +/* + * Public string showing the reachable netlink component version number + */ +const char *opal_reachable_netlink_component_version_string = + "OPAL netlink reachable MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int reachable_netlink_open(void); +static int reachable_netlink_close(void); +static int reachable_netlink_component_query(mca_base_module_t **module, int *priority); +static int component_register(void); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +opal_reachable_base_component_t mca_reachable_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + /* Indicate that we are a reachable v1.1.0 component (which also + implies a specific MCA version) */ + + OPAL_REACHABLE_BASE_VERSION_2_0_0, + + /* Component name and version */ + + "netlink", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* Component open and close functions */ + + reachable_netlink_open, + reachable_netlink_close, + reachable_netlink_component_query, + component_register + }, + /* Next the MCA v1.0.0 component meta data */ + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + +static int reachable_netlink_open(void) +{ + /* construct the component fields */ + + return OPAL_SUCCESS; +} + +static int reachable_netlink_close(void) +{ + return OPAL_SUCCESS; +} + +static int component_register(void) +{ + return OPAL_SUCCESS; +} + +static int +reachable_netlink_component_query(mca_base_module_t **module, int *priority) +{ + *priority = 50; + *module = (mca_base_module_t *) &opal_reachable_netlink_module; + return OPAL_SUCCESS; +} diff --git a/opal/mca/reachable/netlink/reachable_netlink_module.c b/opal/mca/reachable/netlink/reachable_netlink_module.c new file mode 100644 index 0000000000..60c8e075ae --- /dev/null +++ b/opal/mca/reachable/netlink/reachable_netlink_module.c @@ -0,0 +1,48 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#include "opal/mca/reachable/base/base.h" +#include "reachable_netlink.h" + +/* Local variables */ +static int init_counter = 0; + + +static int netlink_init(void) +{ + ++init_counter; + + return OPAL_SUCCESS; +} + +static int netlink_fini(void) +{ + --init_counter; + + return OPAL_SUCCESS; +} + +static opal_if_t* netlink_reachable(opal_list_t *local_if, + opal_list_t *remote_if) +{ + /* JMS Fill me in */ + return NULL; +} + +const opal_reachable_base_module_t opal_reachable_netlink_module = { + netlink_init, + netlink_fini, + netlink_reachable +}; diff --git a/opal/mca/reachable/netlink/reachable_netlink_utils_common.c b/opal/mca/reachable/netlink/reachable_netlink_utils_common.c new file mode 100644 index 0000000000..81abe44e20 --- /dev/null +++ b/opal/mca/reachable/netlink/reachable_netlink_utils_common.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. + * + * Portions of this software copied from libfabric + * (https://github.com/ofiwg/libfabric) + * + * LICENSE_BEGIN + * + * BSD license: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * LICENSE_END + * + * + */ + +#include "opal_config.h" + +#include +#include +#include + +#include "libnl_utils.h" + +/* Adapt this copied code for Open MPI */ +#include "opal/util/output.h" + + +static struct nla_policy route_policy[RTA_MAX+1] = { + [RTA_IIF] = { .type = NLA_STRING, + .maxlen = IFNAMSIZ, }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, + [RTA_CACHEINFO] = { .minlen = sizeof(struct rta_cacheinfo) }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .type = NLA_NESTED }, +}; + +static int usnic_is_nlreply_expected(struct usnic_nl_sk *unlsk, + struct nlmsghdr *nlm_hdr) +{ +#if OPAL_ENABLE_DEBUG + if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh) + || nlm_hdr->nlmsg_seq != unlsk->seq) { + opal_output(0, "Not an expected reply msg pid: %u local pid: %u msg seq: %u expected seq: %u\n", + nlm_hdr->nlmsg_pid, + nl_socket_get_local_port(unlsk->nlh), + nlm_hdr->nlmsg_seq, unlsk->seq); + return 0; + } +#endif + + return 1; +} + +static int usnic_is_nlreply_err(struct nlmsghdr *nlm_hdr) +{ + if (nlm_hdr->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr); + if (nlm_hdr->nlmsg_len >= (__u32)NLMSG_SIZE(sizeof(*e))) + opal_output(0, + "Received a netlink error message"); + else + opal_output(0, + "Received a truncated netlink error message\n"); + return 1; + } + + return 0; +} + +static int usnic_nl_send_query(struct usnic_nl_sk *unlsk, + struct nl_msg *msg, + int protocol, int flag) +{ + struct nlmsghdr *nlhdr; + + nlhdr = nlmsg_hdr(msg); + nlhdr->nlmsg_pid = nl_socket_get_local_port(unlsk->nlh); + nlhdr->nlmsg_seq = ++unlsk->seq; + nlmsg_set_proto(msg, protocol); + nlhdr->nlmsg_flags = flag; + + return nl_send(unlsk->nlh, msg); +} + +static int usnic_nl_set_rcvsk_timer(NL_HANDLE *nlh) +{ + int err = 0; + struct timeval timeout; + + timeout.tv_sec = 1; + timeout.tv_usec = 0; + + err = setsockopt(nl_socket_get_fd(nlh), SOL_SOCKET, SO_RCVTIMEO, + (char *)&timeout, sizeof(timeout)); +#if OPAL_ENABLE_DEBUG + if (err < 0) + opal_output(0, "Failed to set SO_RCVTIMEO for nl socket"); +#endif + + return err; +} + +static int usnic_nl_sk_alloc(struct usnic_nl_sk **p_sk, int protocol) +{ + struct usnic_nl_sk *unlsk; + NL_HANDLE *nlh; + int err; + + unlsk = calloc(1, sizeof(*unlsk)); + if (!unlsk) { + opal_output(0, "Failed to allocate usnic_nl_sk struct\n"); + return ENOMEM; + } + + nlh = NL_HANDLE_ALLOC(); + if (!nlh) { + opal_output(0, "Failed to allocate nl handle\n"); + err = ENOMEM; + goto err_free_unlsk; + } + + err = nl_connect(nlh, protocol); + if (err < 0) { + opal_output(0, "Failed to connnect netlink route socket error: %s\n", + NL_GETERROR(err)); + err = EINVAL; + goto err_free_nlh; + } + + NL_DISABLE_SEQ_CHECK(nlh); + err = usnic_nl_set_rcvsk_timer(nlh); + if (err < 0) + goto err_close_nlh; + + unlsk->nlh = nlh; + unlsk->seq = time(NULL); + *p_sk = unlsk; + return 0; + + err_close_nlh: + nl_close(nlh); + err_free_nlh: + NL_HANDLE_FREE(nlh); + err_free_unlsk: + free(unlsk); + return err; +} + +static void usnic_nl_sk_free(struct usnic_nl_sk *unlsk) +{ + nl_close(unlsk->nlh); + NL_HANDLE_FREE(unlsk->nlh); + free(unlsk); +} + +static int usnic_rt_raw_parse_cb(struct nl_msg *msg, void *arg) +{ + struct usnic_rt_cb_arg *lookup_arg = (struct usnic_rt_cb_arg *)arg; + struct usnic_nl_sk *unlsk = lookup_arg->unlsk; + struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX + 1]; + int found = 0; + int err; + + INC_CB_MSGCNT(lookup_arg); + + if (!usnic_is_nlreply_expected(unlsk, nlm_hdr)) { +#if OPAL_ENABLE_DEBUG + nl_msg_dump(msg, stderr); +#endif + return NL_SKIP; + } + + if (usnic_is_nlreply_err(nlm_hdr)) { +#if OPAL_ENABLE_DEBUG + nl_msg_dump(msg, stderr); +#endif + return NL_SKIP; + } + + if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) { +#if OPAL_ENABLE_DEBUG + char buf[128]; + nl_nlmsgtype2str(nlm_hdr->nlmsg_type, buf, sizeof(buf)); + opal_output(0, "Received an invalid route request reply message type: %s\n", + buf); + nl_msg_dump(msg, stderr); +#endif + return NL_SKIP; + } + + rtm = nlmsg_data(nlm_hdr); + if (rtm->rtm_family != AF_INET) { +#if OPAL_ENABLE_DEBUG + opal_output(0, "RTM message contains invalid AF family: %u\n", + rtm->rtm_family); + nl_msg_dump(msg, stderr); +#endif + return NL_SKIP; + } + + err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX, + route_policy); + if (err < 0) { +#if OPAL_ENABLE_DEBUG + opal_output(0, "nlmsg parse error %s\n", NL_GETERROR(err)); + nl_msg_dump(msg, stderr); +#endif + return NL_SKIP; + } + + if (tb[RTA_OIF]) { + if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif) + found = 1; + else + opal_output(0, "Retrieved route has a different outgoing interface %d (expected %d)\n", + nla_get_u32(tb[RTA_OIF]), + lookup_arg->oif); + } + + if (found && tb[RTA_GATEWAY]) + lookup_arg->nh_addr = nla_get_u32(tb[RTA_GATEWAY]); + + lookup_arg->found = found; + return NL_STOP; +} + +int opal_reachable_netlink_nl_rt_lookup(uint32_t src_addr, + uint32_t dst_addr, int oif, + uint32_t *nh_addr) +{ + struct usnic_nl_sk *unlsk; + struct nl_msg *nlm; + struct rtmsg rmsg; + struct usnic_rt_cb_arg arg; + int err; + + unlsk = NULL; + err = usnic_nl_sk_alloc(&unlsk, NETLINK_ROUTE); + if (err) + return err; + + memset(&rmsg, 0, sizeof(rmsg)); + rmsg.rtm_family = AF_INET; + rmsg.rtm_dst_len = sizeof(dst_addr) * CHAR_BIT; + rmsg.rtm_src_len = sizeof(src_addr) * CHAR_BIT; + + nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0); + if (!nlm) { + opal_output(0, "Failed to alloc nl message, %s\n", + NL_GETERROR(err)); + err = ENOMEM; + goto out; + } + nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO); + nla_put_u32(nlm, RTA_DST, dst_addr); + nla_put_u32(nlm, RTA_SRC, src_addr); + + err = usnic_nl_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST); + nlmsg_free(nlm); + if (err < 0) { + opal_output(0, "Failed to send RTM_GETROUTE query message, error %s\n", + NL_GETERROR(err)); + err = EINVAL; + goto out; + } + + memset(&arg, 0, sizeof(arg)); + arg.oif = oif; + arg.unlsk = unlsk; + err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM, + usnic_rt_raw_parse_cb, &arg); + if (err != 0) { + opal_output(0, "Failed to setup callback function, error %s\n", + NL_GETERROR(err)); + err = EINVAL; + goto out; + } + + NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out); + + if (arg.found) { + *nh_addr = arg.nh_addr; + err = 0; + } else { + err = EHOSTUNREACH; + } + + out: + usnic_nl_sk_free(unlsk); + return err; +} diff --git a/opal/mca/reachable/reachable.h b/opal/mca/reachable/reachable.h new file mode 100644 index 0000000000..db77271786 --- /dev/null +++ b/opal/mca/reachable/reachable.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_REACHABLE_H +#define OPAL_REACHABLE_H + +#include "opal_config.h" +#include "opal/types.h" + +#include "opal/mca/mca.h" +#include "opal/mca/if/if.h" + +BEGIN_C_DECLS + + +/* Init */ +typedef int (*opal_reachable_base_module_init_fn_t)(void); + +/* Finalize */ +typedef int (*opal_reachable_base_module_fini_fn_t)(void); + +/* Given a list of local interfaces and a list of remote + * interfaces, return the interface that is the "best" + * for connecting to the remote process. + * + * local_if: list of local opal_if_t interfaces + * remote_if: list of opal_if_t interfaces for the remote + * process + * + * return value: pointer to opal_if_t on local_if that is + * the "best" option for connecting. NULL + * indicates that the remote process cannot + * be reached on any interface + */ +typedef opal_if_t* +(*opal_reachable_base_module_reachable_fn_t)(opal_list_t *local_if, + opal_list_t *remote_if); + + +/* + * the standard public API data structure + */ +typedef struct { + /* currently used APIs */ + opal_reachable_base_module_init_fn_t init; + opal_reachable_base_module_fini_fn_t finalize; + opal_reachable_base_module_reachable_fn_t reachable; +} opal_reachable_base_module_t; + +typedef struct { + mca_base_component_t base_version; + mca_base_component_data_t base_data; + int priority; +} opal_reachable_base_component_t; + +/* + * Macro for use in components that are of type reachable + */ +#define OPAL_REACHABLE_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "reachable", 2, 0, 0 + +/* Global structure for accessing reachability functions */ +OPAL_DECLSPEC extern opal_reachable_base_module_t opal_reachable; + + +END_C_DECLS + +#endif diff --git a/opal/mca/reachable/weighted/.opal_ignore b/opal/mca/reachable/weighted/.opal_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opal/mca/reachable/weighted/Makefile.am b/opal/mca/reachable/weighted/Makefile.am new file mode 100644 index 0000000000..667f48723b --- /dev/null +++ b/opal/mca/reachable/weighted/Makefile.am @@ -0,0 +1,34 @@ +# +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + reachable_weighted.h \ + reachable_weighted_component.c \ + reachable_weighted.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_reachable_weighted_DSO +component_noinst = +component_install = mca_reachable_weighted.la +else +component_noinst = libmca_reachable_weighted.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_reachable_weighted_la_SOURCES = $(sources) +mca_reachable_weighted_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_reachable_weighted_la_SOURCES =$(sources) +libmca_reachable_weighted_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/reachable/weighted/reachable_weighted.c b/opal/mca/reachable/weighted/reachable_weighted.c new file mode 100644 index 0000000000..ccc41bb58b --- /dev/null +++ b/opal/mca/reachable/weighted/reachable_weighted.c @@ -0,0 +1,275 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Mellanox Technologies, Inc. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/types.h" + +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/mca/if/if.h" + +#include "opal/mca/reachable/base/base.h" +#include "reachable_weighted.h" + +static int weighted_init(void); +static int weighted_fini(void); +static opal_if_t* weighted_reachable(opal_list_t *local_if, + opal_list_t *remote_if); + +/* + * describes the quality of a possible connection between a local and + * a remote network interface + */ +enum connection_quality { + CQ_NO_CONNECTION, + CQ_PRIVATE_DIFFERENT_NETWORK, + CQ_PRIVATE_SAME_NETWORK, + CQ_PUBLIC_DIFFERENT_NETWORK, + CQ_PUBLIC_SAME_NETWORK +}; + + +const opal_reachable_base_module_t opal_reachable_weighted_module = { + weighted_init, + weighted_fini, + weighted_reachable +}; + +// local variables +static int init_cntr = 0; + +static int weighted_init(void) +{ + ++init_cntr; + + return OPAL_SUCCESS; +} + +static int weighted_fini(void) +{ + --init_cntr; + + return OPAL_SUCCESS; +} + +static opal_if_t* weighted_reachable(opal_list_t *local_if, + opal_list_t *remote_if) +{ + size_t perm_size, num_local_interfaces, num_peer_interfaces; + enum connection_quality **weights; + + /* + * assign weights to each possible pair of interfaces + */ + num_local_interfaces = opal_list_get_size(local_if); + num_peer_interfaces = opal_list_get_size(remote_if); + + perm_size = num_local_interfaces; + if (num_peer_interfaces > perm_size) { + perm_size = num_peer_interfaces; + } + + weights = (enum connection_quality**)malloc(perm_size * sizeof(enum connection_quality*)); + + best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size + * sizeof(mca_btl_tcp_addr_t **)); + for(i = 0; i < perm_size; ++i) { + weights[i] = (enum connection_quality*) malloc(perm_size * sizeof(enum connection_quality)); + memset(weights[i], 0, perm_size * sizeof(enum connection_quality)); + + best_addr[i] = (mca_btl_tcp_addr_t **) malloc(perm_size * sizeof(mca_btl_tcp_addr_t *)); + memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp_addr_t *)); + } + + for(i=0; iipv4_address && + NULL != peer_interfaces[j]->ipv4_address) { + + /* check for loopback */ + if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address) + && !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address)) + || (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address) + && !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address)) + || (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address) + && !opal_ifislocal(proc_hostname))) { + + /* No connection is possible on these interfaces */ + + /* check for RFC1918 */ + } else if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address) + && opal_net_addr_isipv4public((struct sockaddr*) + peer_interfaces[j]->ipv4_address)) { + if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address, + (struct sockaddr*) peer_interfaces[j]->ipv4_address, + local_interfaces[i]->ipv4_netmask)) { + weights[i][j] = CQ_PUBLIC_SAME_NETWORK; + } else { + weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; + } + best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; + continue; + } else { + if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address, + (struct sockaddr*) peer_interfaces[j]->ipv4_address, + local_interfaces[i]->ipv4_netmask)) { + weights[i][j] = CQ_PRIVATE_SAME_NETWORK; + } else { + weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK; + } + best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr; + } + } + + /* check state of ipv6 address pair - ipv6 is always public, + * since link-local addresses are skipped in opal_ifinit() + */ + if(NULL != local_interfaces[i]->ipv6_address && + NULL != peer_interfaces[j]->ipv6_address) { + + /* check for loopback */ + if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address) + && !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address)) + || (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address) + && !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address)) + || (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address) + && !opal_ifislocal(proc_hostname))) { + + /* No connection is possible on these interfaces */ + + } else if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv6_address, + (struct sockaddr*) peer_interfaces[j]->ipv6_address, + local_interfaces[i]->ipv6_netmask)) { + weights[i][j] = CQ_PUBLIC_SAME_NETWORK; + } else { + weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK; + } + best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr; + } + + } /* for each peer interface */ + } /* for each local interface */ + + /* + * determine the size of the set to permute (max number of + * interfaces + */ + + best_assignment = (unsigned int *) malloc (perm_size * sizeof(int)); + + a = (int *) malloc(perm_size * sizeof(int)); + if (NULL == a) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* Can only find the best set of connections when the number of + * interfaces is not too big. When it gets larger, we fall back + * to a simpler and faster (and not as optimal) algorithm. + * See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031 + * for more details about this issue. */ + if (perm_size <= MAX_PERMUTATION_INTERFACES) { + memset(a, 0, perm_size * sizeof(int)); + max_assignment_cardinality = -1; + max_assignment_weight = -1; + visit(0, -1, perm_size, a); + + rc = OPAL_ERR_UNREACH; + for(i = 0; i < perm_size; ++i) { + if(best_assignment[i] > num_peer_interfaces + || weights[i][best_assignment[i]] == CQ_NO_CONNECTION + || peer_interfaces[best_assignment[i]]->inuse + || NULL == peer_interfaces[best_assignment[i]]) { + continue; + } + peer_interfaces[best_assignment[i]]->inuse++; + btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]]; + btl_endpoint->endpoint_addr->addr_inuse++; + rc = OPAL_SUCCESS; + break; + } + } else { + enum mca_btl_tcp_connection_quality max; + int i_max = 0, j_max = 0; + /* Find the best connection that is not in use. Save away + * the indices of the best location. */ + max = CQ_NO_CONNECTION; + for(i=0; iinuse) { + if (weights[i][j] > max) { + max = weights[i][j]; + i_max = i; + j_max = j; + } + } + } + } + /* Now see if there is a some type of connection available. */ + rc = OPAL_ERR_UNREACH; + if (CQ_NO_CONNECTION != max) { + peer_interfaces[j_max]->inuse++; + btl_endpoint->endpoint_addr = best_addr[i_max][j_max]; + btl_endpoint->endpoint_addr->addr_inuse++; + rc = OPAL_SUCCESS; + } + } + + for(i = 0; i < perm_size; ++i) { + free(weights[i]); + free(best_addr[i]); + } + + for(i = 0; i < num_peer_interfaces; ++i) { + if(NULL != peer_interfaces[i]->ipv4_address) { + free(peer_interfaces[i]->ipv4_address); + } + if(NULL != peer_interfaces[i]->ipv6_address) { + free(peer_interfaces[i]->ipv6_address); + } + free(peer_interfaces[i]); + } + free(peer_interfaces); + peer_interfaces = NULL; + max_peer_interfaces = 0; + + for(i = 0; i < num_local_interfaces; ++i) { + if(NULL != local_interfaces[i]->ipv4_address) { + free(local_interfaces[i]->ipv4_address); + } + if(NULL != local_interfaces[i]->ipv6_address) { + free(local_interfaces[i]->ipv6_address); + } + free(local_interfaces[i]); + } + free(local_interfaces); + local_interfaces = NULL; + max_local_interfaces = 0; + + free(weights); + free(best_addr); + free(best_assignment); + free(a); + return false; +} diff --git a/opal/mca/reachable/weighted/reachable_weighted.h b/opal/mca/reachable/weighted/reachable_weighted.h new file mode 100644 index 0000000000..04113b1046 --- /dev/null +++ b/opal/mca/reachable/weighted/reachable_weighted.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_REACHABLE_WEIGHTED_H +#define MCA_REACHABLE_WEIGHTED_H + +#include "opal_config.h" + +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_SYS_UN_H +#include +#endif + +#include "opal/mca/mca.h" +#include "opal/mca/event/event.h" +#include "opal/util/proc.h" + +#include "opal/mca/pmix/base/base.h" + +BEGIN_C_DECLS + +typedef struct { + opal_reachable_base_component_t super; +} opal_reachable_weighted_component_t; + +OPAL_DECLSPEC extern opal_reachable_weighted_component_t mca_reachable_weighted_component; + +OPAL_DECLSPEC extern const opal_reachable_base_module_t opal_reachable_weighted_module; + + +END_C_DECLS + +#endif /* MCA_REACHABLE_WEIGHTED_H */ diff --git a/opal/mca/reachable/weighted/reachable_weighted_component.c b/opal/mca/reachable/weighted/reachable_weighted_component.c new file mode 100644 index 0000000000..f2e1afe51a --- /dev/null +++ b/opal/mca/reachable/weighted/reachable_weighted_component.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/util/proc.h" +#include "opal/mca/reachable/reachable.h" +#include "reachable_weighted.h" + +/* + * Public string showing the reachable weighted component version number + */ +const char *opal_reachable_weighted_component_version_string = + "OPAL weighted reachable MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int reachable_weighted_open(void); +static int reachable_weighted_close(void); +static int reachable_weighted_component_query(mca_base_module_t **module, int *priority); +static int component_register(void); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +opal_reachable_weighted_component_t mca_reachable_weighted_component = { + { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + /* Indicate that we are a reachable v1.1.0 component (which also + implies a specific MCA version) */ + + OPAL_REACHABLE_BASE_VERSION_2_0_0, + + /* Component name and version */ + + "weighted", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* Component open and close functions */ + + reachable_weighted_open, + reachable_weighted_close, + reachable_weighted_component_query, + component_register + }, + /* Next the MCA v1.0.0 component meta data */ + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } + } +}; + +static int reachable_weighted_open(void) +{ + /* construct the component fields */ + + return OPAL_SUCCESS; +} + +static int reachable_weighted_close(void) +{ + return OPAL_SUCCESS; +} + +static int component_register(void) +{ + return OPAL_SUCCESS; +} + +static int reachable_weighted_component_query(mca_base_module_t **module, int *priority) +{ + *priority = 1; + *module = (mca_base_module_t *)&opal_reachable_weighted_module; + return OPAL_SUCCESS; +}