btl/tcp: add support for dynamic add_procs
This commit makes two changes to the tcp btl: - If a tcp proc does not exist when handling a new connection create a new proc and use it. The current implementation uses the opal_proc_by_name() function to get the opal_proc_t then calls add_procs on all btl modules. It may be sufficient to just call add_procs until an endpoint is created so this may change somewhat. - In add_procs add a check for an existing endpoint before creating one. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
536aba1172
Коммит
40067f7ec4
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
@ -72,6 +72,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
||||
struct opal_proc_t* opal_proc = procs[i];
|
||||
mca_btl_tcp_proc_t* tcp_proc;
|
||||
mca_btl_base_endpoint_t* tcp_endpoint;
|
||||
bool existing_found = false;
|
||||
|
||||
/* Do not create loopback TCP connections */
|
||||
if( my_proc == opal_proc ) {
|
||||
@ -90,28 +91,43 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
||||
|
||||
OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
|
||||
|
||||
/* The btl_proc datastructure is shared by all TCP BTL
|
||||
* instances that are trying to reach this destination.
|
||||
* Cache the peer instance on the btl_proc.
|
||||
*/
|
||||
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
||||
if(NULL == tcp_endpoint) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
for (int j = 0 ; j < tcp_proc->proc_endpoint_count ; ++j) {
|
||||
tcp_endpoint = tcp_proc->proc_endpoints[j];
|
||||
if (tcp_endpoint->endpoint_btl == tcp_btl) {
|
||||
existing_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tcp_endpoint->endpoint_btl = tcp_btl;
|
||||
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
||||
if(rc != OPAL_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
OBJ_RELEASE(tcp_endpoint);
|
||||
continue;
|
||||
if (!existing_found) {
|
||||
/* The btl_proc datastructure is shared by all TCP BTL
|
||||
* instances that are trying to reach this destination.
|
||||
* Cache the peer instance on the btl_proc.
|
||||
*/
|
||||
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
||||
if(NULL == tcp_endpoint) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
tcp_endpoint->endpoint_btl = tcp_btl;
|
||||
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
||||
if(rc != OPAL_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
OBJ_RELEASE(tcp_endpoint);
|
||||
continue;
|
||||
}
|
||||
|
||||
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
|
||||
}
|
||||
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
|
||||
if (NULL != reachable) {
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
}
|
||||
|
||||
peers[i] = tcp_endpoint;
|
||||
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
|
||||
|
||||
/* we increase the count of MPI users of the event library
|
||||
once per peer, so that we are used until we aren't
|
||||
|
@ -14,7 +14,9 @@
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -738,6 +740,31 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
|
||||
opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
|
||||
*name, (void**)&proc);
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
|
||||
if (OPAL_UNLIKELY(NULL == proc)) {
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
opal_proc_t *opal_proc;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}",
|
||||
name->jobid, name->vpid));
|
||||
|
||||
opal_proc = opal_proc_for_name (*name);
|
||||
if (NULL == opal_proc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* try adding this proc to each btl until */
|
||||
for (int i = 0 ; i < mca_btl_tcp_component.tcp_num_btls ; ++i) {
|
||||
endpoint = NULL;
|
||||
(void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc,
|
||||
&endpoint, NULL);
|
||||
if (NULL != endpoint && NULL == proc) {
|
||||
/* get the proc and continue on (could probably just break here) */
|
||||
proc = endpoint->endpoint_proc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return proc;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user