reachable: Enable weighted component / fix interface
Based on work from usNIC, the best way to use the reachability information the reachable components return is to build a connectivity graph between the two peers and run a bipartite graph solver. Rather than returning the "best" pairing, the reachability framework now returns the entire mapping, allowing a (soon to be added) graph solver to build the "optimal" connectivity pairing. Practically, this means changing the return type of the reachable() function and rewriting the weighted_reachable() function to return the full mapping. The netlink_reachable() function still always returns NULL. At the same time, fix bit-rot in the weighted component and enable builds of the component by removing the opal_ignore. Also, add IPv6 support to the weighted component to support both use cases in the TCP BTL. Signed-off-by: Brian Barrett <bbarrett@amazon.com>
Этот коммит содержится в:
родитель
8f2df42055
Коммит
3f8d294191
@ -14,4 +14,5 @@ headers += \
|
||||
|
||||
libmca_reachable_la_SOURCES += \
|
||||
base/reachable_base_frame.c \
|
||||
base/reachable_base_select.c
|
||||
base/reachable_base_select.c \
|
||||
base/reachable_base_alloc.c
|
||||
|
@ -29,6 +29,10 @@ OPAL_DECLSPEC extern mca_base_framework_t opal_reachable_base_framework;
|
||||
*/
|
||||
OPAL_DECLSPEC int opal_reachable_base_select(void);
|
||||
|
||||
OPAL_DECLSPEC opal_reachable_t * opal_reachable_allocate(unsigned int num_local,
|
||||
unsigned int num_remote);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
66
opal/mca/reachable/base/reachable_base_alloc.c
Обычный файл
66
opal/mca/reachable/base/reachable_base_alloc.c
Обычный файл
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
|
||||
* All Rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
#include "opal/mca/reachable/reachable.h"
|
||||
#include "opal/mca/reachable/base/base.h"
|
||||
|
||||
|
||||
static void opal_reachable_construct(opal_reachable_t *reachable)
|
||||
{
|
||||
reachable->weights = NULL;
|
||||
}
|
||||
|
||||
|
||||
static void opal_reachable_destruct(opal_reachable_t * reachable)
|
||||
{
|
||||
if (NULL != reachable->memory) {
|
||||
free(reachable->memory);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
opal_reachable_t * opal_reachable_allocate(unsigned int num_local,
|
||||
unsigned int num_remote)
|
||||
{
|
||||
char *memory;
|
||||
unsigned int i;
|
||||
opal_reachable_t *reachable = OBJ_NEW(opal_reachable_t);
|
||||
|
||||
reachable->num_local = num_local;
|
||||
reachable->num_remote = num_remote;
|
||||
|
||||
/* allocate all the pieces of the two dimensional array in one
|
||||
malloc, rather than a bunch of little allocations */
|
||||
memory = malloc(sizeof(int*) * num_local +
|
||||
num_local * (sizeof(int) * num_remote));
|
||||
if (memory == NULL) return NULL;
|
||||
|
||||
reachable->memory = (void*)memory;
|
||||
reachable->weights = (int**)reachable->memory;
|
||||
memory += (sizeof(int*) * num_local);
|
||||
|
||||
for (i = 0; i < num_local; i++) {
|
||||
reachable->weights[i] = (int*)memory;
|
||||
memory += (sizeof(int) * num_remote);
|
||||
}
|
||||
|
||||
return reachable;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
opal_reachable_t,
|
||||
opal_object_t,
|
||||
opal_reachable_construct,
|
||||
opal_reachable_destruct
|
||||
);
|
@ -34,8 +34,8 @@ static int netlink_fini(void)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static opal_if_t* netlink_reachable(opal_list_t *local_if,
|
||||
opal_list_t *remote_if)
|
||||
static opal_reachable_t* netlink_reachable(opal_list_t *local_if,
|
||||
opal_list_t *remote_if)
|
||||
{
|
||||
/* JMS Fill me in */
|
||||
return NULL;
|
||||
|
@ -3,6 +3,8 @@
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
|
||||
* All Rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -15,12 +17,37 @@
|
||||
|
||||
#include "opal_config.h"
|
||||
#include "opal/types.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/if/if.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* Reachability matrix between endpoints of a given pair of hosts
|
||||
*
|
||||
* The output of the reachable() call is a opal_reachable_t, which
|
||||
* gives an matrix of the connectivity between local and remote
|
||||
* ethernet endpoints. Any given value in weights is the connectivity
|
||||
* between the local endpoint index (first index) and the remote
|
||||
* endpoint index (second index), and is a value between 0 and INT_MAX
|
||||
* representing a relative connectivity.
|
||||
*/
|
||||
struct opal_reachable_t {
|
||||
opal_object_t super;
|
||||
/** number of local interfaces passed to reachable() */
|
||||
int num_local;
|
||||
/** number of remote interfaces passed to reachable() */
|
||||
int num_remote;
|
||||
/** matric of connectivity weights */
|
||||
int **weights;
|
||||
/** \internal */
|
||||
void *memory;
|
||||
};
|
||||
typedef struct opal_reachable_t opal_reachable_t;
|
||||
OBJ_CLASS_DECLARATION(opal_reachable_t);
|
||||
|
||||
/* Init */
|
||||
typedef int (*opal_reachable_base_module_init_fn_t)(void);
|
||||
@ -28,20 +55,19 @@ typedef int (*opal_reachable_base_module_init_fn_t)(void);
|
||||
/* Finalize */
|
||||
typedef int (*opal_reachable_base_module_fini_fn_t)(void);
|
||||
|
||||
/* Given a list of local interfaces and a list of remote
|
||||
* interfaces, return the interface that is the "best"
|
||||
* for connecting to the remote process.
|
||||
/* Build reachability matrix between local and remote ethernet
|
||||
* interfaces
|
||||
*
|
||||
* local_if: list of local opal_if_t interfaces
|
||||
* remote_if: list of opal_if_t interfaces for the remote
|
||||
* process
|
||||
* Given a list of local interfaces and remote interfaces from a
|
||||
* single peer, build a reachability matrix between the two peers.
|
||||
* This function does not select the best pairing of local and remote
|
||||
* interfaces, but only a (comparable) reachability between any pair
|
||||
* of local/remote interfaces.
|
||||
*
|
||||
* return value: pointer to opal_if_t on local_if that is
|
||||
* the "best" option for connecting. NULL
|
||||
* indicates that the remote process cannot
|
||||
* be reached on any interface
|
||||
* @returns a reachable object containing the reachability matrix on
|
||||
* success, NULL on failure.
|
||||
*/
|
||||
typedef opal_if_t*
|
||||
typedef opal_reachable_t*
|
||||
(*opal_reachable_base_module_reachable_fn_t)(opal_list_t *local_if,
|
||||
opal_list_t *remote_if);
|
||||
|
||||
@ -65,7 +91,7 @@ typedef struct {
|
||||
/*
|
||||
* Macro for use in components that are of type reachable
|
||||
*/
|
||||
#define OPAL_REACHABLE_BASE_VERSION_2_0_0 \
|
||||
#define OPAL_REACHABLE_BASE_VERSION_2_0_0 \
|
||||
OPAL_MCA_BASE_VERSION_2_1_0("reachable", 2, 0, 0)
|
||||
|
||||
/* Global structure for accessing reachability functions */
|
||||
|
@ -5,6 +5,8 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
|
||||
* All Rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -20,30 +22,41 @@
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_MATH_H
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/if/if.h"
|
||||
|
||||
#include "opal/mca/reachable/base/base.h"
|
||||
#include "reachable_weighted.h"
|
||||
#include "opal/util/net.h"
|
||||
|
||||
static int weighted_init(void);
|
||||
static int weighted_fini(void);
|
||||
static opal_if_t* weighted_reachable(opal_list_t *local_if,
|
||||
opal_list_t *remote_if);
|
||||
static opal_reachable_t* weighted_reachable(opal_list_t *local_if,
|
||||
opal_list_t *remote_if);
|
||||
|
||||
static int get_weights(opal_if_t *local_if, opal_if_t *remote_if);
|
||||
static int calculate_weight(int bandwidth_local, int bandwidth_remote,
|
||||
int connection_quality);
|
||||
|
||||
/*
|
||||
* describes the quality of a possible connection between a local and
|
||||
* a remote network interface
|
||||
* Describes the quality of a possible connection between a local and
|
||||
* a remote network interface. Highest connection quality is assigned
|
||||
* to connections between interfaces on same network. This is because
|
||||
* same network implies a single hop to destination. Public addresses
|
||||
* are preferred over private addresses. This is all guessing,
|
||||
* because we don't know actual network topology.
|
||||
*/
|
||||
enum connection_quality {
|
||||
CQ_NO_CONNECTION,
|
||||
CQ_PRIVATE_DIFFERENT_NETWORK,
|
||||
CQ_PRIVATE_SAME_NETWORK,
|
||||
CQ_PUBLIC_DIFFERENT_NETWORK,
|
||||
CQ_PUBLIC_SAME_NETWORK
|
||||
CQ_NO_CONNECTION = 0,
|
||||
CQ_PRIVATE_DIFFERENT_NETWORK = 50,
|
||||
CQ_PRIVATE_SAME_NETWORK = 80,
|
||||
CQ_PUBLIC_DIFFERENT_NETWORK = 90,
|
||||
CQ_PUBLIC_SAME_NETWORK = 100
|
||||
};
|
||||
|
||||
|
||||
const opal_reachable_base_module_t opal_reachable_weighted_module = {
|
||||
weighted_init,
|
||||
weighted_fini,
|
||||
@ -53,6 +66,7 @@ const opal_reachable_base_module_t opal_reachable_weighted_module = {
|
||||
// local variables
|
||||
static int init_cntr = 0;
|
||||
|
||||
|
||||
static int weighted_init(void)
|
||||
{
|
||||
++init_cntr;
|
||||
@ -67,207 +81,183 @@ static int weighted_fini(void)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static opal_if_t* weighted_reachable(opal_list_t *local_if,
|
||||
opal_list_t *remote_if)
|
||||
|
||||
static opal_reachable_t* weighted_reachable(opal_list_t *local_if,
|
||||
opal_list_t *remote_if)
|
||||
{
|
||||
size_t perm_size, num_local_interfaces, num_peer_interfaces;
|
||||
enum connection_quality **weights;
|
||||
opal_reachable_t *reachable_results = NULL;
|
||||
int i, j;
|
||||
opal_if_t *local_iter, *remote_iter;
|
||||
|
||||
/*
|
||||
* assign weights to each possible pair of interfaces
|
||||
*/
|
||||
num_local_interfaces = opal_list_get_size(local_if);
|
||||
num_peer_interfaces = opal_list_get_size(remote_if);
|
||||
|
||||
perm_size = num_local_interfaces;
|
||||
if (num_peer_interfaces > perm_size) {
|
||||
perm_size = num_peer_interfaces;
|
||||
reachable_results = opal_reachable_allocate(opal_list_get_size(local_if),
|
||||
opal_list_get_size(remote_if));
|
||||
if (NULL == reachable_results) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
weights = (enum connection_quality**)malloc(perm_size * sizeof(enum connection_quality*));
|
||||
|
||||
best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size
|
||||
* sizeof(mca_btl_tcp_addr_t **));
|
||||
for(i = 0; i < perm_size; ++i) {
|
||||
weights[i] = (enum connection_quality*) malloc(perm_size * sizeof(enum connection_quality));
|
||||
memset(weights[i], 0, perm_size * sizeof(enum connection_quality));
|
||||
|
||||
best_addr[i] = (mca_btl_tcp_addr_t **) malloc(perm_size * sizeof(mca_btl_tcp_addr_t *));
|
||||
memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp_addr_t *));
|
||||
i = 0;
|
||||
OPAL_LIST_FOREACH(local_iter, local_if, opal_if_t) {
|
||||
j = 0;
|
||||
OPAL_LIST_FOREACH(remote_iter, remote_if, opal_if_t) {
|
||||
reachable_results->weights[i][j] = get_weights(local_iter, remote_iter);
|
||||
j++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
for(i=0; i<num_local_interfaces; ++i) {
|
||||
for(j=0; j<num_peer_interfaces; ++j) {
|
||||
|
||||
/* initially, assume no connection is possible */
|
||||
weights[i][j] = CQ_NO_CONNECTION;
|
||||
|
||||
/* check state of ipv4 address pair */
|
||||
if (NULL != local_interfaces[i]->ipv4_address &&
|
||||
NULL != peer_interfaces[j]->ipv4_address) {
|
||||
|
||||
/* check for loopback */
|
||||
if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address)
|
||||
&& !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address))
|
||||
|| (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv4_address)
|
||||
&& !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address))
|
||||
|| (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv4_address)
|
||||
&& !opal_ifislocal(proc_hostname))) {
|
||||
|
||||
/* No connection is possible on these interfaces */
|
||||
|
||||
/* check for RFC1918 */
|
||||
} else if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address)
|
||||
&& opal_net_addr_isipv4public((struct sockaddr*)
|
||||
peer_interfaces[j]->ipv4_address)) {
|
||||
if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
|
||||
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
||||
local_interfaces[i]->ipv4_netmask)) {
|
||||
weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
||||
} else {
|
||||
weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
||||
}
|
||||
best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
||||
continue;
|
||||
} else {
|
||||
if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
|
||||
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
||||
local_interfaces[i]->ipv4_netmask)) {
|
||||
weights[i][j] = CQ_PRIVATE_SAME_NETWORK;
|
||||
} else {
|
||||
weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK;
|
||||
}
|
||||
best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
||||
}
|
||||
}
|
||||
|
||||
/* check state of ipv6 address pair - ipv6 is always public,
|
||||
* since link-local addresses are skipped in opal_ifinit()
|
||||
*/
|
||||
if(NULL != local_interfaces[i]->ipv6_address &&
|
||||
NULL != peer_interfaces[j]->ipv6_address) {
|
||||
|
||||
/* check for loopback */
|
||||
if ((opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address)
|
||||
&& !opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address))
|
||||
|| (opal_net_islocalhost((struct sockaddr *)peer_interfaces[j]->ipv6_address)
|
||||
&& !opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address))
|
||||
|| (opal_net_islocalhost((struct sockaddr *)local_interfaces[i]->ipv6_address)
|
||||
&& !opal_ifislocal(proc_hostname))) {
|
||||
|
||||
/* No connection is possible on these interfaces */
|
||||
|
||||
} else if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv6_address,
|
||||
(struct sockaddr*) peer_interfaces[j]->ipv6_address,
|
||||
local_interfaces[i]->ipv6_netmask)) {
|
||||
weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
||||
} else {
|
||||
weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
||||
}
|
||||
best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr;
|
||||
}
|
||||
|
||||
} /* for each peer interface */
|
||||
} /* for each local interface */
|
||||
|
||||
/*
|
||||
* determine the size of the set to permute (max number of
|
||||
* interfaces
|
||||
*/
|
||||
|
||||
best_assignment = (unsigned int *) malloc (perm_size * sizeof(int));
|
||||
|
||||
a = (int *) malloc(perm_size * sizeof(int));
|
||||
if (NULL == a) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Can only find the best set of connections when the number of
|
||||
* interfaces is not too big. When it gets larger, we fall back
|
||||
* to a simpler and faster (and not as optimal) algorithm.
|
||||
* See ticket https://svn.open-mpi.org/trac/ompi/ticket/2031
|
||||
* for more details about this issue. */
|
||||
if (perm_size <= MAX_PERMUTATION_INTERFACES) {
|
||||
memset(a, 0, perm_size * sizeof(int));
|
||||
max_assignment_cardinality = -1;
|
||||
max_assignment_weight = -1;
|
||||
visit(0, -1, perm_size, a);
|
||||
|
||||
rc = OPAL_ERR_UNREACH;
|
||||
for(i = 0; i < perm_size; ++i) {
|
||||
if(best_assignment[i] > num_peer_interfaces
|
||||
|| weights[i][best_assignment[i]] == CQ_NO_CONNECTION
|
||||
|| peer_interfaces[best_assignment[i]]->inuse
|
||||
|| NULL == peer_interfaces[best_assignment[i]]) {
|
||||
continue;
|
||||
}
|
||||
peer_interfaces[best_assignment[i]]->inuse++;
|
||||
btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]];
|
||||
btl_endpoint->endpoint_addr->addr_inuse++;
|
||||
rc = OPAL_SUCCESS;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
enum mca_btl_tcp_connection_quality max;
|
||||
int i_max = 0, j_max = 0;
|
||||
/* Find the best connection that is not in use. Save away
|
||||
* the indices of the best location. */
|
||||
max = CQ_NO_CONNECTION;
|
||||
for(i=0; i<num_local_interfaces; ++i) {
|
||||
for(j=0; j<num_peer_interfaces; ++j) {
|
||||
if (!peer_interfaces[j]->inuse) {
|
||||
if (weights[i][j] > max) {
|
||||
max = weights[i][j];
|
||||
i_max = i;
|
||||
j_max = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Now see if there is a some type of connection available. */
|
||||
rc = OPAL_ERR_UNREACH;
|
||||
if (CQ_NO_CONNECTION != max) {
|
||||
peer_interfaces[j_max]->inuse++;
|
||||
btl_endpoint->endpoint_addr = best_addr[i_max][j_max];
|
||||
btl_endpoint->endpoint_addr->addr_inuse++;
|
||||
rc = OPAL_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < perm_size; ++i) {
|
||||
free(weights[i]);
|
||||
free(best_addr[i]);
|
||||
}
|
||||
|
||||
for(i = 0; i < num_peer_interfaces; ++i) {
|
||||
if(NULL != peer_interfaces[i]->ipv4_address) {
|
||||
free(peer_interfaces[i]->ipv4_address);
|
||||
}
|
||||
if(NULL != peer_interfaces[i]->ipv6_address) {
|
||||
free(peer_interfaces[i]->ipv6_address);
|
||||
}
|
||||
free(peer_interfaces[i]);
|
||||
}
|
||||
free(peer_interfaces);
|
||||
peer_interfaces = NULL;
|
||||
max_peer_interfaces = 0;
|
||||
|
||||
for(i = 0; i < num_local_interfaces; ++i) {
|
||||
if(NULL != local_interfaces[i]->ipv4_address) {
|
||||
free(local_interfaces[i]->ipv4_address);
|
||||
}
|
||||
if(NULL != local_interfaces[i]->ipv6_address) {
|
||||
free(local_interfaces[i]->ipv6_address);
|
||||
}
|
||||
free(local_interfaces[i]);
|
||||
}
|
||||
free(local_interfaces);
|
||||
local_interfaces = NULL;
|
||||
max_local_interfaces = 0;
|
||||
|
||||
free(weights);
|
||||
free(best_addr);
|
||||
free(best_assignment);
|
||||
free(a);
|
||||
return false;
|
||||
return reachable_results;
|
||||
}
|
||||
|
||||
|
||||
static int get_weights(opal_if_t *local_if, opal_if_t *remote_if)
|
||||
{
|
||||
char str_local[128], str_remote[128], *conn_type;
|
||||
struct sockaddr *local_sockaddr, *remote_sockaddr;
|
||||
int weight;
|
||||
|
||||
local_sockaddr = (struct sockaddr *)&local_if->if_addr;
|
||||
remote_sockaddr = (struct sockaddr *)&remote_if->if_addr;
|
||||
|
||||
/* opal_net_get_hostname returns a static buffer. Great for
|
||||
single address printfs, need to copy in this case */
|
||||
strncpy(str_local, opal_net_get_hostname(local_sockaddr), sizeof(str_local));
|
||||
strncpy(str_remote, opal_net_get_hostname(remote_sockaddr), sizeof(str_remote));
|
||||
|
||||
/* initially, assume no connection is possible */
|
||||
weight = calculate_weight(0, 0, CQ_NO_CONNECTION);
|
||||
|
||||
if (AF_INET == local_sockaddr->sa_family &&
|
||||
AF_INET == remote_sockaddr->sa_family) {
|
||||
|
||||
if (opal_net_addr_isipv4public(local_sockaddr) &&
|
||||
opal_net_addr_isipv4public(remote_sockaddr)) {
|
||||
if (opal_net_samenetwork(local_sockaddr,
|
||||
remote_sockaddr,
|
||||
local_if->if_mask)) {
|
||||
conn_type = "IPv4 PUBLIC SAME NETWORK";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_PUBLIC_SAME_NETWORK);
|
||||
} else {
|
||||
conn_type = "IPv4 PUBLIC DIFFERENT NETWORK";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_PUBLIC_DIFFERENT_NETWORK);
|
||||
}
|
||||
} else if (!opal_net_addr_isipv4public(local_sockaddr) &&
|
||||
!opal_net_addr_isipv4public(remote_sockaddr)) {
|
||||
if (opal_net_samenetwork(local_sockaddr,
|
||||
remote_sockaddr,
|
||||
local_if->if_mask)) {
|
||||
conn_type = "IPv4 PRIVATE SAME NETWORK";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_PRIVATE_SAME_NETWORK);
|
||||
} else {
|
||||
conn_type = "IPv4 PRIVATE DIFFERENT NETWORK";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_PRIVATE_DIFFERENT_NETWORK);
|
||||
}
|
||||
} else {
|
||||
/* one private, one public address. likely not a match. */
|
||||
conn_type = "IPv4 NO CONNECTION";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_NO_CONNECTION);
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_IPV6
|
||||
} else if (AF_INET6 == local_sockaddr->sa_family &&
|
||||
AF_INET6 == remote_sockaddr->sa_family) {
|
||||
if (opal_net_addr_isipv6linklocal(local_sockaddr) &&
|
||||
opal_net_addr_isipv6linklocal(remote_sockaddr)) {
|
||||
/* we can't actually tell if link local addresses are on
|
||||
* the same network or not with the weighted component.
|
||||
* Assume they are on the same network, so that they'll be
|
||||
* most likely to be paired together, breaking the fewest
|
||||
* number of connections.
|
||||
*
|
||||
* There used to be a comment in this code (and one in the
|
||||
* BTL TCP code as well) that the opal_if code doesn't
|
||||
* pass link-local addresses through. However, this is
|
||||
* demonstratably not true on Linux, where link-local
|
||||
* interfaces are created. Since it's easy to handle
|
||||
* either case, do so.
|
||||
*/
|
||||
conn_type = "IPv6 LINK-LOCAL SAME NETWORK";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_PRIVATE_SAME_NETWORK);
|
||||
} else if (!opal_net_addr_isipv6linklocal(local_sockaddr) &&
|
||||
!opal_net_addr_isipv6linklocal(remote_sockaddr)) {
|
||||
if (opal_net_samenetwork(local_sockaddr,
|
||||
remote_sockaddr,
|
||||
local_if->if_mask)) {
|
||||
conn_type = "IPv6 PUBLIC SAME NETWORK";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_PUBLIC_SAME_NETWORK);
|
||||
} else {
|
||||
conn_type = "IPv6 PUBLIC DIFFERENT NETWORK";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_PUBLIC_DIFFERENT_NETWORK);
|
||||
}
|
||||
} else {
|
||||
/* one link-local, one public address. likely not a match. */
|
||||
conn_type = "IPv6 NO CONNECTION";
|
||||
weight = calculate_weight(local_if->if_bandwidth,
|
||||
remote_if->if_bandwidth,
|
||||
CQ_NO_CONNECTION);
|
||||
}
|
||||
#endif /* #if OPAL_ENABLE_IPV6 */
|
||||
|
||||
} else {
|
||||
/* we don't have an address family match, so assume no
|
||||
connection */
|
||||
conn_type = "Address type mismatch";
|
||||
weight = calculate_weight(0, 0, CQ_NO_CONNECTION);
|
||||
}
|
||||
|
||||
opal_output_verbose(20, opal_reachable_base_framework.framework_output,
|
||||
"reachable:weighted: path from %s to %s: %s",
|
||||
str_local, str_remote, conn_type);
|
||||
|
||||
return weight;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Weights determined by bandwidth between
|
||||
* interfaces (limited by lower bandwidth
|
||||
* interface). A penalty is added to minimize
|
||||
* the discrepancy in bandwidth. This helps
|
||||
* prevent pairing of fast and slow interfaces
|
||||
*
|
||||
* Formula: connection_quality * (min(a,b) + 1/(1 + |a-b|))
|
||||
*
|
||||
* Examples: a b f(a,b)
|
||||
* 0 0 1
|
||||
* 0 1 0.5
|
||||
* 1 1 2
|
||||
* 1 2 1.5
|
||||
* 1 3 1.33
|
||||
* 1 10 1.1
|
||||
* 10 10 11
|
||||
* 10 14 10.2
|
||||
* 11 14 11.25
|
||||
* 11 15 11.2
|
||||
*
|
||||
* NOTE: connection_quality of 1 is assumed for examples.
|
||||
* In reality, since we're using integers, we need
|
||||
* connection_quality to be large enough
|
||||
* to capture decimals
|
||||
*/
|
||||
static int calculate_weight(int bandwidth_local, int bandwidth_remote,
|
||||
int connection_quality)
|
||||
{
|
||||
int weight = connection_quality * (MIN(bandwidth_local, bandwidth_remote) +
|
||||
1.0 / (1.0 + (double)abs(bandwidth_local - bandwidth_remote)));
|
||||
return weight;
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
|
||||
* All Rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -19,6 +21,7 @@
|
||||
#include <sys/un.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/reachable/reachable.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
@ -5,6 +5,8 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
|
||||
* All Rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -71,7 +73,7 @@ opal_reachable_weighted_component_t mca_reachable_weighted_component = {
|
||||
.mca_register_component_params = component_register,
|
||||
},
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
.base_version = {
|
||||
.base_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user