1
1

btl/tcp: add support for dynamic add_procs

This commit makes two changes to the tcp btl:

 - If a tcp proc does not exist when handling a new connection create
   a new proc and use it. The current implementation uses the
   opal_proc_by_name() function to get the opal_proc_t then calls
   add_procs on all btl modules. It may be sufficient to just call
   add_procs until an endpoint is created so this may change somewhat.

 - In add_procs add a check for an existing endpoint before creating
   one.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-05-07 12:14:32 -06:00
родитель 536aba1172
Коммит 40067f7ec4
2 изменённых файлов: 61 добавлений и 18 удалений

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
@ -72,6 +72,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
struct opal_proc_t* opal_proc = procs[i];
mca_btl_tcp_proc_t* tcp_proc;
mca_btl_base_endpoint_t* tcp_endpoint;
bool existing_found = false;
/* Do not create loopback TCP connections */
if( my_proc == opal_proc ) {
@ -90,28 +91,43 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
/* The btl_proc datastructure is shared by all TCP BTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
for (int j = 0 ; j < tcp_proc->proc_endpoint_count ; ++j) {
tcp_endpoint = tcp_proc->proc_endpoints[j];
if (tcp_endpoint->endpoint_btl == tcp_btl) {
existing_found = true;
break;
}
}
tcp_endpoint->endpoint_btl = tcp_btl;
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OPAL_SUCCESS) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(tcp_endpoint);
continue;
if (!existing_found) {
/* The btl_proc datastructure is shared by all TCP BTL
* instances that are trying to reach this destination.
* Cache the peer instance on the btl_proc.
*/
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
tcp_endpoint->endpoint_btl = tcp_btl;
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OPAL_SUCCESS) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(tcp_endpoint);
continue;
}
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
}
opal_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
if (NULL != reachable) {
opal_bitmap_set_bit(reachable, i);
}
peers[i] = tcp_endpoint;
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
/* we increase the count of MPI users of the event library
once per peer, so that we are used until we aren't

Просмотреть файл

@ -14,7 +14,9 @@
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -738,6 +740,31 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
*name, (void**)&proc);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
if (OPAL_UNLIKELY(NULL == proc)) {
mca_btl_base_endpoint_t *endpoint;
opal_proc_t *opal_proc;
int rc;
BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}",
name->jobid, name->vpid));
opal_proc = opal_proc_for_name (*name);
if (NULL == opal_proc) {
return NULL;
}
/* try adding this proc to each btl until */
for (int i = 0 ; i < mca_btl_tcp_component.tcp_num_btls ; ++i) {
endpoint = NULL;
(void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc,
&endpoint, NULL);
if (NULL != endpoint && NULL == proc) {
/* get the proc and continue on (could probably just break here) */
proc = endpoint->endpoint_proc;
}
}
}
return proc;
}