Fix the add_proc issue identified by Jeff: the TCP BTL now discard a
peer proc without TCP support instead of completely dropping TCP support for the entire job. cmr=v1.8.2:reviewer=jsquyres This commit was SVN r31753.
Этот коммит содержится в:
родитель
95e637f5ba
Коммит
f27123a20d
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -82,7 +82,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
||||
}
|
||||
|
||||
if(NULL == (tcp_proc = mca_btl_tcp_proc_create(ompi_proc))) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -100,6 +100,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
||||
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
||||
if(NULL == tcp_endpoint) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
OBJ_RELEASE(ompi_proc);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -107,6 +108,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
||||
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||
OBJ_RELEASE(ompi_proc);
|
||||
OBJ_RELEASE(tcp_endpoint);
|
||||
continue;
|
||||
}
|
||||
|
@ -1,8 +1,9 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -1042,7 +1043,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
||||
mca_btl_tcp_component.tcp_free_list_max,
|
||||
mca_btl_tcp_component.tcp_free_list_inc,
|
||||
NULL );
|
||||
|
||||
|
||||
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_max,
|
||||
sizeof (mca_btl_tcp_frag_max_t) +
|
||||
mca_btl_tcp_module.super.btl_max_send_size,
|
||||
@ -1053,7 +1054,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
||||
mca_btl_tcp_component.tcp_free_list_max,
|
||||
mca_btl_tcp_component.tcp_free_list_inc,
|
||||
NULL );
|
||||
|
||||
|
||||
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_user,
|
||||
sizeof (mca_btl_tcp_frag_user_t),
|
||||
opal_cache_line_size,
|
||||
@ -1063,7 +1064,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
||||
mca_btl_tcp_component.tcp_free_list_max,
|
||||
mca_btl_tcp_component.tcp_free_list_inc,
|
||||
NULL );
|
||||
|
||||
|
||||
/* create a BTL TCP module for selected interfaces */
|
||||
if(OMPI_SUCCESS != (ret = mca_btl_tcp_component_create_instances() )) {
|
||||
return 0;
|
||||
|
@ -1,8 +1,9 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2010 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -114,7 +115,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
||||
if(NULL == btl_proc)
|
||||
return NULL;
|
||||
btl_proc->proc_ompi = ompi_proc;
|
||||
|
||||
|
||||
/* add to hash table of all proc instance */
|
||||
opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
||||
hash, btl_proc);
|
||||
@ -126,7 +127,8 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
||||
(void**)&btl_proc->proc_addrs,
|
||||
&size );
|
||||
if(rc != OMPI_SUCCESS) {
|
||||
BTL_ERROR(("ompi_modex_recv: failed with return value=%d", rc));
|
||||
if(OPAL_ERR_NOT_FOUND != rc)
|
||||
BTL_ERROR(("ompi_modex_recv: failed with return value=%d", rc));
|
||||
OBJ_RELEASE(btl_proc);
|
||||
return NULL;
|
||||
}
|
||||
@ -145,6 +147,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
||||
OBJ_RELEASE(btl_proc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(NULL == mca_btl_tcp_component.tcp_local && ompi_proc == ompi_proc_local()) {
|
||||
mca_btl_tcp_component.tcp_local = btl_proc;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user