fa082cafa9
Note: this is an early preliminary step in the movement of portions of the datatype engine to the opal layer. This commit was SVN r18198.
697 строки
24 KiB
C
697 строки
24 KiB
C
/*
|
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#ifdef HAVE_ARPA_INET_H
|
|
#include <arpa/inet.h>
|
|
#endif
|
|
#ifdef HAVE_NETINET_IN_H
|
|
#include <netinet/in.h>
|
|
#endif
|
|
|
|
#include "opal/class/opal_hash_table.h"
|
|
#include "ompi/mca/btl/base/btl_base_error.h"
|
|
#include "ompi/runtime/ompi_module_exchange.h"
|
|
#include "opal/util/arch.h"
|
|
#include "opal/util/if.h"
|
|
#include "opal/util/net.h"
|
|
#include "orte/mca/oob/tcp/oob_tcp_addr.h"
|
|
|
|
#include "btl_tcp.h"
|
|
#include "btl_tcp_proc.h"
|
|
|
|
static void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc);
|
|
static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc);
|
|
|
|
mca_btl_tcp_interface_t* local_interfaces[MAX_KERNEL_INTERFACES];
|
|
mca_btl_tcp_interface_t* peer_interfaces[MAX_KERNEL_INTERFACES];
|
|
int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
|
|
int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX];
|
|
size_t num_local_interfaces;
|
|
size_t num_peer_interfaces;
|
|
unsigned int *best_assignment;
|
|
int max_assignment_weight;
|
|
int max_assignment_cardinality;
|
|
enum mca_btl_tcp_connection_quality **weights;
|
|
struct mca_btl_tcp_addr_t ***best_addr;
|
|
|
|
OBJ_CLASS_INSTANCE( mca_btl_tcp_proc_t,
|
|
opal_list_item_t,
|
|
mca_btl_tcp_proc_construct,
|
|
mca_btl_tcp_proc_destruct );
|
|
|
|
void mca_btl_tcp_proc_construct(mca_btl_tcp_proc_t* proc)
|
|
{
|
|
proc->proc_ompi = 0;
|
|
proc->proc_addrs = NULL;
|
|
proc->proc_addr_count = 0;
|
|
proc->proc_endpoints = NULL;
|
|
proc->proc_endpoint_count = 0;
|
|
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
|
|
}
|
|
|
|
/*
|
|
* Cleanup ib proc instance
|
|
*/
|
|
|
|
void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc)
|
|
{
|
|
/* remove from list of all proc instances */
|
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
|
opal_hash_table_remove_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
|
orte_util_hash_name(&proc->proc_name));
|
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
|
|
|
/* release resources */
|
|
if(NULL != proc->proc_endpoints) {
|
|
free(proc->proc_endpoints);
|
|
OBJ_DESTRUCT(&proc->proc_lock);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Create a TCP process structure. There is a one-to-one correspondence
|
|
* between a ompi_proc_t and a mca_btl_tcp_proc_t instance. We cache
|
|
* additional data (specifically the list of mca_btl_tcp_endpoint_t instances,
|
|
* and published addresses) associated w/ a given destination on this
|
|
* datastructure.
|
|
*/
|
|
|
|
mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
|
{
|
|
int rc;
|
|
size_t size;
|
|
mca_btl_tcp_proc_t* btl_proc;
|
|
uint64_t hash = orte_util_hash_name(&ompi_proc->proc_name);
|
|
|
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
|
rc = opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
|
hash, (void**)&btl_proc);
|
|
if(OMPI_SUCCESS == rc) {
|
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
|
return btl_proc;
|
|
}
|
|
|
|
btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
|
|
if(NULL == btl_proc)
|
|
return NULL;
|
|
btl_proc->proc_ompi = ompi_proc;
|
|
btl_proc->proc_name = ompi_proc->proc_name;
|
|
|
|
/* add to hash table of all proc instance */
|
|
opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
|
hash, btl_proc);
|
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
|
|
|
/* lookup tcp parameters exported by this proc */
|
|
rc = ompi_modex_recv( &mca_btl_tcp_component.super.btl_version,
|
|
ompi_proc,
|
|
(void**)&btl_proc->proc_addrs,
|
|
&size );
|
|
if(rc != OMPI_SUCCESS) {
|
|
BTL_ERROR(("mca_base_modex_recv: failed with return value=%d", rc));
|
|
OBJ_RELEASE(btl_proc);
|
|
return NULL;
|
|
}
|
|
if(0 != (size % sizeof(mca_btl_tcp_addr_t))) {
|
|
BTL_ERROR(("mca_base_modex_recv: invalid size %d: btl-size: %d\n",
|
|
size, sizeof(mca_btl_tcp_addr_t)));
|
|
return NULL;
|
|
}
|
|
btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t);
|
|
|
|
/* allocate space for endpoint array - one for each exported address */
|
|
btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
|
|
malloc((1 + btl_proc->proc_addr_count) *
|
|
sizeof(mca_btl_base_endpoint_t*));
|
|
if(NULL == btl_proc->proc_endpoints) {
|
|
OBJ_RELEASE(btl_proc);
|
|
return NULL;
|
|
}
|
|
if(NULL == mca_btl_tcp_component.tcp_local && ompi_proc == ompi_proc_local()) {
|
|
mca_btl_tcp_component.tcp_local = btl_proc;
|
|
}
|
|
{
|
|
/* convert the OMPI addr_family field to OS constants,
|
|
* so we can check for AF_INET (or AF_INET6) and don't have
|
|
* to deal with byte ordering anymore.
|
|
*/
|
|
unsigned int i;
|
|
for (i = 0; i < btl_proc->proc_addr_count; i++) {
|
|
if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) {
|
|
btl_proc->proc_addrs[i].addr_family = AF_INET;
|
|
}
|
|
#if OPAL_WANT_IPV6
|
|
if (MCA_BTL_TCP_AF_INET6 == btl_proc->proc_addrs[i].addr_family) {
|
|
btl_proc->proc_addrs[i].addr_family = AF_INET6;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
return btl_proc;
|
|
}
|
|
|
|
|
|
|
|
static void evaluate_assignment(int *a) {
|
|
size_t i;
|
|
unsigned int max_interfaces = num_local_interfaces;
|
|
int assignment_weight = 0;
|
|
int assignment_cardinality = 0;
|
|
|
|
|
|
|
|
if(max_interfaces < num_peer_interfaces) {
|
|
max_interfaces = num_peer_interfaces;
|
|
}
|
|
|
|
for(i = 0; i < max_interfaces; ++i) {
|
|
if(0 < weights[i][a[i]-1]) {
|
|
++assignment_cardinality;
|
|
assignment_weight += weights[i][a[i]-1];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* check wether current solution beats all previous solutions
|
|
*/
|
|
if(assignment_cardinality > max_assignment_cardinality
|
|
|| (assignment_cardinality == max_assignment_cardinality
|
|
&& assignment_weight > max_assignment_weight)) {
|
|
|
|
for(i = 0; i < max_interfaces; ++i) {
|
|
best_assignment[i] = a[i]-1;
|
|
}
|
|
max_assignment_weight = assignment_weight;
|
|
max_assignment_cardinality = assignment_cardinality;
|
|
}
|
|
}
|
|
|
|
static void visit(int k, int level, int siz, int *a)
|
|
{
|
|
level = level+1; a[k] = level;
|
|
|
|
if (level == siz) {
|
|
evaluate_assignment(a);
|
|
} else {
|
|
int i;
|
|
for ( i = 0; i < siz; i++)
|
|
if (a[i] == 0)
|
|
visit(i, level, siz, a);
|
|
}
|
|
|
|
level = level-1; a[k] = 0;
|
|
}
|
|
|
|
|
|
static void mca_btl_tcp_initialise_interface(mca_btl_tcp_interface_t* interface,
|
|
int ifk_index, int index)
|
|
{
|
|
interface->kernel_index = ifk_index;
|
|
interface->peer_interface = -1;
|
|
interface->ipv4_address = NULL;
|
|
interface->ipv6_address = NULL;
|
|
interface->index = index;
|
|
interface->inuse = 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Note that this routine must be called with the lock on the process
|
|
* already held. Insert a btl instance into the proc array and assign
|
|
* it an address.
|
|
*/
|
|
int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
|
mca_btl_base_endpoint_t* btl_endpoint )
|
|
{
|
|
size_t i, j;
|
|
struct sockaddr_storage endpoint_addr_ss, local_addr;
|
|
int idx, rc;
|
|
int *a = NULL;
|
|
unsigned int perm_size;
|
|
|
|
num_local_interfaces = 0;
|
|
num_peer_interfaces = 0;
|
|
|
|
#ifndef WORDS_BIGENDIAN
|
|
/* if we are little endian and our peer is not so lucky, then we
|
|
need to put all information sent to him in big endian (aka
|
|
Network Byte Order) and expect all information received to
|
|
be in NBO. Since big endian machines always send and receive
|
|
in NBO, we don't care so much about that case. */
|
|
if (btl_proc->proc_ompi->proc_arch & OPAL_ARCH_ISBIGENDIAN) {
|
|
btl_endpoint->endpoint_nbo = true;
|
|
}
|
|
#endif
|
|
|
|
/* insert into endpoint array */
|
|
btl_endpoint->endpoint_proc = btl_proc;
|
|
btl_proc->proc_endpoints[btl_proc->proc_endpoint_count++] = btl_endpoint;
|
|
|
|
|
|
memset(local_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX);
|
|
memset(peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX);
|
|
memset(local_interfaces, 0, sizeof(local_interfaces));
|
|
memset(peer_interfaces, 0, sizeof(peer_interfaces));
|
|
|
|
/*
|
|
* the following two blocks shout CODE DUPLICATION. We are aware of
|
|
* the problem
|
|
*/
|
|
|
|
/*
|
|
* identify all kernel interfaces and the associated addresses of
|
|
* the local node
|
|
*/
|
|
for (idx = opal_ifbegin(); idx >= 0; idx=opal_ifnext (idx)) {
|
|
int kindex, index;
|
|
|
|
opal_ifindextoaddr (idx, (struct sockaddr*) &local_addr, sizeof (local_addr));
|
|
|
|
kindex = opal_ifindextokindex(idx);
|
|
index = local_kindex_to_index[kindex];
|
|
|
|
/* create entry for this kernel index previously not seen */
|
|
if(-1 == index) {
|
|
index = num_local_interfaces++;
|
|
local_kindex_to_index[kindex] = index;
|
|
local_interfaces[index] = malloc(sizeof(mca_btl_tcp_interface_t));
|
|
assert(NULL != local_interfaces[index]);
|
|
mca_btl_tcp_initialise_interface(local_interfaces[index], kindex, index);
|
|
}
|
|
|
|
switch(local_addr.ss_family) {
|
|
case AF_INET:
|
|
/* if AF is disabled, skip it completely */
|
|
if (4 == mca_btl_tcp_component.tcp_disable_family) {
|
|
continue;
|
|
}
|
|
|
|
local_interfaces[local_kindex_to_index[kindex]]->ipv4_address =
|
|
malloc(sizeof(local_addr));
|
|
memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv4_address,
|
|
&local_addr, sizeof(local_addr));
|
|
opal_ifindextomask(idx,
|
|
&local_interfaces[local_kindex_to_index[kindex]]->ipv4_netmask,
|
|
sizeof(int));
|
|
break;
|
|
case AF_INET6:
|
|
/* if AF is disabled, skip it completely */
|
|
if (6 == mca_btl_tcp_component.tcp_disable_family) {
|
|
continue;
|
|
}
|
|
|
|
local_interfaces[local_kindex_to_index[kindex]]->ipv6_address
|
|
= malloc(sizeof(local_addr));
|
|
memcpy(local_interfaces[local_kindex_to_index[kindex]]->ipv6_address,
|
|
&local_addr, sizeof(local_addr));
|
|
opal_ifindextomask(idx,
|
|
&local_interfaces[local_kindex_to_index[kindex]]->ipv6_netmask,
|
|
sizeof(int));
|
|
break;
|
|
default:
|
|
opal_output(0, "unknown address family for tcp: %d\n",
|
|
local_addr.ss_family);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* identify all kernel interfaces and the associated addresses of
|
|
* the peer
|
|
*/
|
|
|
|
for( i = 0; i < btl_proc->proc_addr_count; i++ ) {
|
|
|
|
int index;
|
|
|
|
mca_btl_tcp_addr_t* endpoint_addr = btl_proc->proc_addrs + i;
|
|
|
|
mca_btl_tcp_proc_tosocks (endpoint_addr, &endpoint_addr_ss);
|
|
|
|
index = peer_kindex_to_index[endpoint_addr->addr_ifkindex];
|
|
|
|
if(-1 == index) {
|
|
index = num_peer_interfaces++;
|
|
peer_kindex_to_index[endpoint_addr->addr_ifkindex] = index;
|
|
peer_interfaces[index] = malloc(sizeof(mca_btl_tcp_interface_t));
|
|
mca_btl_tcp_initialise_interface(peer_interfaces[index],
|
|
endpoint_addr->addr_ifkindex, index);
|
|
}
|
|
|
|
/*
|
|
* in case one of the peer addresses is already in use,
|
|
* mark the complete peer interface as 'not available'
|
|
*/
|
|
if(endpoint_addr->addr_inuse) {
|
|
peer_interfaces[index]->inuse = 1;
|
|
}
|
|
|
|
switch(endpoint_addr_ss.ss_family) {
|
|
case AF_INET:
|
|
peer_interfaces[index]->ipv4_address = malloc(sizeof(endpoint_addr_ss));
|
|
peer_interfaces[index]->ipv4_endpoint_addr = endpoint_addr;
|
|
memcpy(peer_interfaces[index]->ipv4_address,
|
|
&endpoint_addr_ss, sizeof(endpoint_addr_ss));
|
|
break;
|
|
case AF_INET6:
|
|
peer_interfaces[index]->ipv6_address = malloc(sizeof(endpoint_addr_ss));
|
|
peer_interfaces[index]->ipv6_endpoint_addr = endpoint_addr;
|
|
memcpy(peer_interfaces[index]->ipv6_address,
|
|
&endpoint_addr_ss, sizeof(endpoint_addr_ss));
|
|
break;
|
|
default:
|
|
opal_output(0, "unknown address family for tcp: %d\n",
|
|
local_addr.ss_family);
|
|
/*
|
|
* return OMPI_UNREACH or some error, as this is not
|
|
* good
|
|
*/
|
|
}
|
|
}
|
|
|
|
/*
|
|
* assign weights to each possible pair of interfaces
|
|
*/
|
|
|
|
perm_size = num_local_interfaces;
|
|
if(num_peer_interfaces > perm_size) {
|
|
perm_size = num_peer_interfaces;
|
|
}
|
|
|
|
weights = (enum mca_btl_tcp_connection_quality**) malloc(perm_size
|
|
* sizeof(enum mca_btl_tcp_connection_quality*));
|
|
|
|
best_addr = (mca_btl_tcp_addr_t ***) malloc(perm_size
|
|
* sizeof(mca_btl_tcp_addr_t **));
|
|
for(i = 0; i < perm_size; ++i) {
|
|
weights[i] = (enum mca_btl_tcp_connection_quality*) malloc(perm_size *
|
|
sizeof(enum mca_btl_tcp_connection_quality));
|
|
memset(weights[i], 0, perm_size * sizeof(enum mca_btl_tcp_connection_quality));
|
|
|
|
best_addr[i] = (mca_btl_tcp_addr_t **) malloc(perm_size *
|
|
sizeof(mca_btl_tcp_addr_t *));
|
|
memset(best_addr[i], 0, perm_size * sizeof(mca_btl_tcp_addr_t *));
|
|
}
|
|
|
|
|
|
for(i=0; i<num_local_interfaces; ++i) {
|
|
for(j=0; j<num_peer_interfaces; ++j) {
|
|
|
|
/* initially, assume no connection is possible */
|
|
weights[i][j] = CQ_NO_CONNECTION;
|
|
|
|
/* check state of ipv4 address pair */
|
|
if(NULL != local_interfaces[i]->ipv4_address &&
|
|
NULL != peer_interfaces[j]->ipv4_address) {
|
|
|
|
/* check for RFC1918 */
|
|
if(opal_net_addr_isipv4public((struct sockaddr*) local_interfaces[i]->ipv4_address)
|
|
&& opal_net_addr_isipv4public((struct sockaddr*)
|
|
peer_interfaces[j]->ipv4_address)) {
|
|
if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
|
|
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
|
local_interfaces[i]->ipv4_netmask)) {
|
|
weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
|
} else {
|
|
weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
|
}
|
|
best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
|
continue;
|
|
} else {
|
|
if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv4_address,
|
|
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
|
local_interfaces[i]->ipv4_netmask)) {
|
|
weights[i][j] = CQ_PRIVATE_SAME_NETWORK;
|
|
} else {
|
|
weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK;
|
|
}
|
|
best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
|
}
|
|
}
|
|
|
|
/* check state of ipv6 address pair - ipv6 is always public,
|
|
* since link-local addresses are skipped in opal_ifinit()
|
|
*/
|
|
if(NULL != local_interfaces[i]->ipv6_address &&
|
|
NULL != peer_interfaces[j]->ipv6_address) {
|
|
if(opal_net_samenetwork((struct sockaddr*) local_interfaces[i]->ipv6_address,
|
|
(struct sockaddr*) peer_interfaces[j]->ipv6_address,
|
|
local_interfaces[i]->ipv6_netmask)) {
|
|
weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
|
} else {
|
|
weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
|
}
|
|
best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr;
|
|
}
|
|
|
|
} /* for each peer interface */
|
|
} /* for each local interface */
|
|
|
|
/*
|
|
* determine the size of the set to permute (max number of
|
|
* interfaces
|
|
*/
|
|
|
|
best_assignment = malloc (perm_size * sizeof(int));
|
|
|
|
a = (int *) malloc(perm_size * sizeof(int));
|
|
if (NULL == a) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
memset(a, 0, perm_size * sizeof(int));
|
|
max_assignment_cardinality = -1;
|
|
max_assignment_weight = -1;
|
|
visit(0, -1, perm_size, a);
|
|
|
|
rc = OMPI_ERR_UNREACH;
|
|
for(i = 0; i < perm_size; ++i) {
|
|
if(best_assignment[i] > num_peer_interfaces
|
|
|| weights[i][best_assignment[i]] == CQ_NO_CONNECTION
|
|
|| peer_interfaces[best_assignment[i]]->inuse
|
|
|| NULL == peer_interfaces[best_assignment[i]]) {
|
|
continue;
|
|
}
|
|
peer_interfaces[best_assignment[i]]->inuse++;
|
|
btl_endpoint->endpoint_addr = best_addr[i][best_assignment[i]];
|
|
btl_endpoint->endpoint_addr->addr_inuse++;
|
|
rc = OMPI_SUCCESS;
|
|
break;
|
|
}
|
|
|
|
for(i = 0; i < perm_size; ++i) {
|
|
free(weights[i]);
|
|
free(best_addr[i]);
|
|
}
|
|
|
|
for(i = 0; i < num_peer_interfaces; ++i) {
|
|
if(NULL != peer_interfaces[i]->ipv4_address) {
|
|
free(peer_interfaces[i]->ipv4_address);
|
|
}
|
|
if(NULL != peer_interfaces[i]->ipv6_address) {
|
|
free(peer_interfaces[i]->ipv6_address);
|
|
}
|
|
free(peer_interfaces[i]);
|
|
}
|
|
|
|
for(i = 0; i < num_local_interfaces; ++i) {
|
|
if(NULL != local_interfaces[i]->ipv4_address) {
|
|
free(local_interfaces[i]->ipv4_address);
|
|
}
|
|
if(NULL != local_interfaces[i]->ipv6_address) {
|
|
free(local_interfaces[i]->ipv6_address);
|
|
}
|
|
free(local_interfaces[i]);
|
|
}
|
|
|
|
free(weights);
|
|
free(best_addr);
|
|
free(best_assignment);
|
|
free(a);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
* Remove an endpoint from the proc array and indicate the address is
|
|
* no longer in use.
|
|
*/
|
|
|
|
int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_t* btl_endpoint)
|
|
{
|
|
size_t i;
|
|
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
|
|
for(i=0; i<btl_proc->proc_endpoint_count; i++) {
|
|
if(btl_proc->proc_endpoints[i] == btl_endpoint) {
|
|
memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
|
|
(btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
|
|
if(--btl_proc->proc_endpoint_count == 0) {
|
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
|
OBJ_RELEASE(btl_proc);
|
|
return OMPI_SUCCESS;
|
|
}
|
|
/* The endpoint_addr may still be NULL if this enpoint is
|
|
being removed early in the wireup sequence (e.g., if it
|
|
is unreachable by all other procs) */
|
|
if (NULL != btl_endpoint->endpoint_addr) {
|
|
btl_endpoint->endpoint_addr->addr_inuse--;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* Look for an existing TCP process instance based on the globally unique
|
|
* process identifier.
|
|
*/
|
|
mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const orte_process_name_t *name)
|
|
{
|
|
mca_btl_tcp_proc_t* proc = NULL;
|
|
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
|
|
opal_hash_table_get_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
|
orte_util_hash_name(name), (void**)&proc);
|
|
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
|
return proc;
|
|
}
|
|
|
|
/*
|
|
* loop through all available BTLs for one matching the source address
|
|
* of the request.
|
|
*/
|
|
bool mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr, int sd)
|
|
{
|
|
size_t i;
|
|
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
|
|
for( i = 0; i < btl_proc->proc_endpoint_count; i++ ) {
|
|
mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i];
|
|
uint16_t remote_kernel_index;
|
|
bool matched = false;
|
|
uint16_t j;
|
|
|
|
/* Check all conditions before going to try to accept the connection. */
|
|
if( btl_endpoint->endpoint_addr->addr_family != addr->sa_family ) {
|
|
continue;
|
|
}
|
|
|
|
/* Check for alias addresses: aliases share the same remote
|
|
* kernel_index, so if the addresses don't match, it can still be
|
|
* an alias of one we want to accept.
|
|
*/
|
|
remote_kernel_index = btl_endpoint->endpoint_addr->addr_ifkindex;
|
|
|
|
for (j=0; j < btl_proc->proc_addr_count; j++) {
|
|
mca_btl_tcp_addr_t* exported_address = btl_proc->proc_addrs + j;
|
|
|
|
#if 0
|
|
/* This is PITA. We never know which source address an
|
|
* incoming/outgoing packet will have, so even with
|
|
* btl_tcp_if_include/exclude on the remote end, we
|
|
* might get a different source address.
|
|
*
|
|
* If this address hasn't the right kernel index, we would
|
|
* erroneously drop the connection
|
|
*/
|
|
if (remote_kernel_index != exported_address->addr_ifkindex) {
|
|
/* kernel_index doesn't match, so that's not a possible
|
|
* alias
|
|
*/
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
switch (addr->sa_family) {
|
|
case AF_INET:
|
|
/* if (aliases) addresses match, accept it */
|
|
if(!memcmp( &exported_address->addr_inet,
|
|
&(((struct sockaddr_in*)addr)->sin_addr),
|
|
sizeof(struct in_addr) ) ) {
|
|
matched = true;
|
|
}
|
|
break;
|
|
#if OPAL_WANT_IPV6
|
|
case AF_INET6:
|
|
if(IN6_ARE_ADDR_EQUAL(
|
|
&exported_address->addr_inet,
|
|
&(((struct sockaddr_in6*)addr)->sin6_addr))) {
|
|
matched = true;
|
|
}
|
|
break;
|
|
#endif
|
|
default:
|
|
;
|
|
}
|
|
}
|
|
|
|
/* do we have a match for the incoming address? Read: is there a
|
|
* btl_endpoint with an address configured for the same remote
|
|
* NIC? If not, check the next btl_endpoint
|
|
*/
|
|
if (false == matched) {
|
|
continue;
|
|
}
|
|
|
|
if(mca_btl_tcp_endpoint_accept(btl_endpoint, addr, sd)) {
|
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
|
return true;
|
|
}
|
|
}
|
|
OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* convert internal data structure (mca_btl_tcp_addr_t) to sockaddr_storage
|
|
*
|
|
*/
|
|
bool mca_btl_tcp_proc_tosocks(mca_btl_tcp_addr_t* proc_addr,
|
|
struct sockaddr_storage* output)
|
|
{
|
|
memset(output, 0, sizeof (*output));
|
|
switch (proc_addr->addr_family) {
|
|
case AF_INET:
|
|
output->ss_family = AF_INET;
|
|
memcpy(&((struct sockaddr_in*)output)->sin_addr,
|
|
&proc_addr->addr_inet, sizeof(struct in_addr));
|
|
((struct sockaddr_in*)output)->sin_port = proc_addr->addr_port;
|
|
break;
|
|
#if OPAL_WANT_IPV6
|
|
case AF_INET6:
|
|
{
|
|
struct sockaddr_in6* inaddr = (struct sockaddr_in6*)output;
|
|
output->ss_family = AF_INET6;
|
|
memcpy(&inaddr->sin6_addr, &proc_addr->addr_inet,
|
|
sizeof (proc_addr->addr_inet));
|
|
inaddr->sin6_port = proc_addr->addr_port;
|
|
inaddr->sin6_scope_id = 0;
|
|
inaddr->sin6_flowinfo = 0;
|
|
}
|
|
break;
|
|
#endif
|
|
default:
|
|
opal_output( 0, "mca_btl_tcp_proc: unknown af_family received: %d\n",
|
|
proc_addr->addr_family );
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|