From 903471787604e5a1ebb21d2b2b6e5446fe420413 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Sat, 23 Jun 2018 08:03:30 -0700 Subject: [PATCH 1/2] btl/tcp: use a hash map for kernel IP interface indexes The giant size of the TCP proc struct is causing a problem in some environments (because it is allocated on the stack), and it was too big, anyway. Instead, use a hash map. That way, it starts small and can grow if it needs to. It also makes no assumptions about the values of the kernel interface indexes. Fixes #5292. Signed-off-by: Jeff Squyres --- opal/mca/btl/tcp/btl_tcp_proc.c | 37 ++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index 9abba8fe66..35761069b8 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -51,10 +51,10 @@ static void mca_btl_tcp_proc_destruct(mca_btl_tcp_proc_t* proc); struct mca_btl_tcp_proc_data_t { mca_btl_tcp_interface_t** local_interfaces; - int local_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; + opal_hash_table_t local_kindex_to_index; size_t num_local_interfaces, max_local_interfaces; size_t num_peer_interfaces; - int peer_kindex_to_index[MAX_KERNEL_INTERFACE_INDEX]; + opal_hash_table_t peer_kindex_to_index; unsigned int *best_assignment; int max_assignment_weight; int max_assignment_cardinality; @@ -280,8 +280,6 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(mca_btl_t if( NULL == proc_data->local_interfaces ) return NULL; - memset(proc_data->local_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); - /* Collect up the list of included and excluded interfaces, if any */ include = opal_argv_split(mca_btl_tcp_component.tcp_if_include,','); exclude = opal_argv_split(mca_btl_tcp_component.tcp_if_exclude,','); @@ -291,7 +289,8 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(mca_btl_t * the local node */ for( idx = opal_ifbegin(); idx >= 0; idx = opal_ifnext (idx) ) { - int kindex, index; + int kindex; + uint32_t index; bool skip = false; opal_ifindextoaddr (idx, (struct sockaddr*) &local_addr, sizeof (local_addr)); @@ -340,12 +339,12 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(mca_btl_t } kindex = opal_ifindextokindex(idx); - index = proc_data->local_kindex_to_index[kindex]; + int rc = opal_hash_table_get_value_uint32(&proc_data->local_kindex_to_index, kindex, (void**) &index); /* create entry for this kernel index previously not seen */ - if(-1 == index) { + if (OPAL_SUCCESS != rc) { index = proc_data->num_local_interfaces++; - proc_data->local_kindex_to_index[kindex] = index; + opal_hash_table_set_value_uint32(&proc_data->local_kindex_to_index, kindex, &index); if( proc_data->num_local_interfaces == proc_data->max_local_interfaces ) { proc_data->max_local_interfaces <<= 1; @@ -359,7 +358,7 @@ static mca_btl_tcp_interface_t** mca_btl_tcp_retrieve_local_interfaces(mca_btl_t mca_btl_tcp_initialise_interface(proc_data->local_interfaces[index], kindex, index); } - local_interface = proc_data->local_interfaces[proc_data->local_kindex_to_index[kindex]]; + local_interface = proc_data->local_interfaces[index]; switch(local_addr.ss_family) { case AF_INET: /* if AF is disabled, skip it completely */ @@ -420,13 +419,18 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, mca_btl_tcp_interface_t** peer_interfaces = NULL; mca_btl_tcp_proc_data_t _proc_data, *proc_data=&_proc_data; size_t max_peer_interfaces; - memset(proc_data, 0, sizeof(mca_btl_tcp_proc_data_t)); char str_local[128], str_remote[128]; if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) { return OPAL_ERR_UNREACH; } + memset(proc_data, 0, sizeof(mca_btl_tcp_proc_data_t)); + OBJ_CONSTRUCT(&_proc_data.local_kindex_to_index, opal_hash_table_t); + opal_hash_table_init(&_proc_data.local_kindex_to_index, 8); + OBJ_CONSTRUCT(&_proc_data.peer_kindex_to_index, opal_hash_table_t); + opal_hash_table_init(&_proc_data.peer_kindex_to_index, 8); + #ifndef WORDS_BIGENDIAN /* if we are little endian and our peer is not so lucky, then we need to put all information sent to him in big endian (aka @@ -457,7 +461,6 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, goto exit; } proc_data->num_peer_interfaces = 0; - memset(proc_data->peer_kindex_to_index, -1, sizeof(int)*MAX_KERNEL_INTERFACE_INDEX); /* * identify all kernel interfaces and the associated addresses of @@ -466,17 +469,17 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, for( i = 0; i < btl_proc->proc_addr_count; i++ ) { - int index; + uint32_t index; mca_btl_tcp_addr_t* endpoint_addr = btl_proc->proc_addrs + i; mca_btl_tcp_proc_tosocks (endpoint_addr, &endpoint_addr_ss); - index = proc_data->peer_kindex_to_index[endpoint_addr->addr_ifkindex]; + rc = opal_hash_table_get_value_uint32(&proc_data->peer_kindex_to_index, endpoint_addr->addr_ifkindex, (void**) &index); - if(-1 == index) { + if (OPAL_SUCCESS != rc) { index = proc_data->num_peer_interfaces++; - proc_data->peer_kindex_to_index[endpoint_addr->addr_ifkindex] = index; + opal_hash_table_set_value_uint32(&proc_data->peer_kindex_to_index, endpoint_addr->addr_ifkindex, &index); if( proc_data->num_peer_interfaces == max_peer_interfaces ) { max_peer_interfaces <<= 1; peer_interfaces = (mca_btl_tcp_interface_t**)realloc( peer_interfaces, @@ -735,6 +738,10 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc, free(proc_data->weights); free(proc_data->best_addr); free(proc_data->best_assignment); + + OBJ_DESTRUCT(&_proc_data.local_kindex_to_index); + OBJ_DESTRUCT(&_proc_data.peer_kindex_to_index); + free(a); return rc; From 3767ce27c0e09eb0f23ef3814a3c38eda4dc927b Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Sat, 23 Jun 2018 08:04:12 -0700 Subject: [PATCH 2/2] btl/tcp: trivial whitespace clean No code/logic changes. Signed-off-by: Jeff Squyres --- opal/mca/btl/tcp/btl_tcp_proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index 35761069b8..2906819c75 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -833,7 +833,7 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; /* We are not here to make a decision about what is good socket * and what is not. We simply check that this socket fit the endpoint - * end we prepare for the real decision function mca_btl_tcp_endpoint_accept. */ + * end we prepare for the real decision function mca_btl_tcp_endpoint_accept. */ if( btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { continue; } @@ -888,7 +888,7 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr } /* In this case the connection was inbound to an address exported, but was not in a CLOSED state. * mca_btl_tcp_endpoint_accept() has logic to deal with the race condition that has likely caused this - * scenario, so call it here.*/ + * scenario, so call it here.*/ if (found_match) { (void)mca_btl_tcp_endpoint_accept(match_btl_endpoint, addr, sd); OPAL_THREAD_UNLOCK(&btl_proc->proc_lock);