Fix the add_proc issue identified by Jeff: the TCP BTL now discard a
peer proc without TCP support instead of completely dropping TCP support for the entire job. cmr=v1.8.2:reviewer=jsquyres This commit was SVN r31753.
Этот коммит содержится в:
родитель
95e637f5ba
Коммит
f27123a20d
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
@ -82,7 +82,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(NULL == (tcp_proc = mca_btl_tcp_proc_create(ompi_proc))) {
|
if(NULL == (tcp_proc = mca_btl_tcp_proc_create(ompi_proc))) {
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -100,6 +100,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
|||||||
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
|
||||||
if(NULL == tcp_endpoint) {
|
if(NULL == tcp_endpoint) {
|
||||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||||
|
OBJ_RELEASE(ompi_proc);
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,6 +108,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
|
|||||||
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
|
||||||
|
OBJ_RELEASE(ompi_proc);
|
||||||
OBJ_RELEASE(tcp_endpoint);
|
OBJ_RELEASE(tcp_endpoint);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
@ -1042,7 +1043,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
|||||||
mca_btl_tcp_component.tcp_free_list_max,
|
mca_btl_tcp_component.tcp_free_list_max,
|
||||||
mca_btl_tcp_component.tcp_free_list_inc,
|
mca_btl_tcp_component.tcp_free_list_inc,
|
||||||
NULL );
|
NULL );
|
||||||
|
|
||||||
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_max,
|
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_max,
|
||||||
sizeof (mca_btl_tcp_frag_max_t) +
|
sizeof (mca_btl_tcp_frag_max_t) +
|
||||||
mca_btl_tcp_module.super.btl_max_send_size,
|
mca_btl_tcp_module.super.btl_max_send_size,
|
||||||
@ -1053,7 +1054,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
|||||||
mca_btl_tcp_component.tcp_free_list_max,
|
mca_btl_tcp_component.tcp_free_list_max,
|
||||||
mca_btl_tcp_component.tcp_free_list_inc,
|
mca_btl_tcp_component.tcp_free_list_inc,
|
||||||
NULL );
|
NULL );
|
||||||
|
|
||||||
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_user,
|
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_user,
|
||||||
sizeof (mca_btl_tcp_frag_user_t),
|
sizeof (mca_btl_tcp_frag_user_t),
|
||||||
opal_cache_line_size,
|
opal_cache_line_size,
|
||||||
@ -1063,7 +1064,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
|
|||||||
mca_btl_tcp_component.tcp_free_list_max,
|
mca_btl_tcp_component.tcp_free_list_max,
|
||||||
mca_btl_tcp_component.tcp_free_list_inc,
|
mca_btl_tcp_component.tcp_free_list_inc,
|
||||||
NULL );
|
NULL );
|
||||||
|
|
||||||
/* create a BTL TCP module for selected interfaces */
|
/* create a BTL TCP module for selected interfaces */
|
||||||
if(OMPI_SUCCESS != (ret = mca_btl_tcp_component_create_instances() )) {
|
if(OMPI_SUCCESS != (ret = mca_btl_tcp_component_create_instances() )) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2010 The University of Tennessee and The University
|
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
@ -114,7 +115,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
|||||||
if(NULL == btl_proc)
|
if(NULL == btl_proc)
|
||||||
return NULL;
|
return NULL;
|
||||||
btl_proc->proc_ompi = ompi_proc;
|
btl_proc->proc_ompi = ompi_proc;
|
||||||
|
|
||||||
/* add to hash table of all proc instance */
|
/* add to hash table of all proc instance */
|
||||||
opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs,
|
||||||
hash, btl_proc);
|
hash, btl_proc);
|
||||||
@ -126,7 +127,8 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
|||||||
(void**)&btl_proc->proc_addrs,
|
(void**)&btl_proc->proc_addrs,
|
||||||
&size );
|
&size );
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
BTL_ERROR(("ompi_modex_recv: failed with return value=%d", rc));
|
if(OPAL_ERR_NOT_FOUND != rc)
|
||||||
|
BTL_ERROR(("ompi_modex_recv: failed with return value=%d", rc));
|
||||||
OBJ_RELEASE(btl_proc);
|
OBJ_RELEASE(btl_proc);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -145,6 +147,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
|
|||||||
OBJ_RELEASE(btl_proc);
|
OBJ_RELEASE(btl_proc);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(NULL == mca_btl_tcp_component.tcp_local && ompi_proc == ompi_proc_local()) {
|
if(NULL == mca_btl_tcp_component.tcp_local && ompi_proc == ompi_proc_local()) {
|
||||||
mca_btl_tcp_component.tcp_local = btl_proc;
|
mca_btl_tcp_component.tcp_local = btl_proc;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user