1
1

btl/tcp: add support for dynamic add_procs

This commit makes two changes to the tcp btl:

 - If a tcp proc does not exist when handling a new connection create
   a new proc and use it. The current implementation uses the
   opal_proc_by_name() function to get the opal_proc_t then calls
   add_procs on all btl modules. It may be sufficient to just call
   add_procs until an endpoint is created so this may change somewhat.

 - In add_procs add a check for an existing endpoint before creating
   one.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-05-07 12:14:32 -06:00
родитель 536aba1172
Коммит 40067f7ec4
2 изменённых файлов: 61 добавлений и 18 удалений

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
@ -72,6 +72,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
struct opal_proc_t* opal_proc = procs[i]; struct opal_proc_t* opal_proc = procs[i];
mca_btl_tcp_proc_t* tcp_proc; mca_btl_tcp_proc_t* tcp_proc;
mca_btl_base_endpoint_t* tcp_endpoint; mca_btl_base_endpoint_t* tcp_endpoint;
bool existing_found = false;
/* Do not create loopback TCP connections */ /* Do not create loopback TCP connections */
if( my_proc == opal_proc ) { if( my_proc == opal_proc ) {
@ -90,28 +91,43 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
OPAL_THREAD_LOCK(&tcp_proc->proc_lock); OPAL_THREAD_LOCK(&tcp_proc->proc_lock);
/* The btl_proc datastructure is shared by all TCP BTL for (int j = 0 ; j < tcp_proc->proc_endpoint_count ; ++j) {
* instances that are trying to reach this destination. tcp_endpoint = tcp_proc->proc_endpoints[j];
* Cache the peer instance on the btl_proc. if (tcp_endpoint->endpoint_btl == tcp_btl) {
*/ existing_found = true;
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t); break;
if(NULL == tcp_endpoint) { }
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
} }
tcp_endpoint->endpoint_btl = tcp_btl; if (!existing_found) {
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint); /* The btl_proc datastructure is shared by all TCP BTL
if(rc != OPAL_SUCCESS) { * instances that are trying to reach this destination.
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); * Cache the peer instance on the btl_proc.
OBJ_RELEASE(tcp_endpoint); */
continue; tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
return OPAL_ERR_OUT_OF_RESOURCE;
}
tcp_endpoint->endpoint_btl = tcp_btl;
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OPAL_SUCCESS) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(tcp_endpoint);
continue;
}
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
} }
opal_bitmap_set_bit(reachable, i);
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock); OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
if (NULL != reachable) {
opal_bitmap_set_bit(reachable, i);
}
peers[i] = tcp_endpoint; peers[i] = tcp_endpoint;
opal_list_append(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
/* we increase the count of MPI users of the event library /* we increase the count of MPI users of the event library
once per peer, so that we are used until we aren't once per peer, so that we are used until we aren't

Просмотреть файл

@ -14,7 +14,9 @@
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -738,6 +740,31 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name)
opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs,
*name, (void**)&proc); *name, (void**)&proc);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
if (OPAL_UNLIKELY(NULL == proc)) {
mca_btl_base_endpoint_t *endpoint;
opal_proc_t *opal_proc;
int rc;
BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}",
name->jobid, name->vpid));
opal_proc = opal_proc_for_name (*name);
if (NULL == opal_proc) {
return NULL;
}
/* try adding this proc to each btl until */
for (int i = 0 ; i < mca_btl_tcp_component.tcp_num_btls ; ++i) {
endpoint = NULL;
(void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc,
&endpoint, NULL);
if (NULL != endpoint && NULL == proc) {
/* get the proc and continue on (could probably just break here) */
proc = endpoint->endpoint_proc;
}
}
}
return proc; return proc;
} }