1
1

Fix the add_proc issue identified by Jeff: the TCP BTL now discard a

peer proc without TCP support instead of completely dropping TCP support for the entire job.

cmr=v1.8.2:reviewer=jsquyres

This commit was SVN r31753.
Этот коммит содержится в:
George Bosilca 2014-05-14 13:47:57 +00:00
родитель 95e637f5ba
Коммит f27123a20d
3 изменённых файлов: 15 добавлений и 9 удалений

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -82,7 +82,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
}
if(NULL == (tcp_proc = mca_btl_tcp_proc_create(ompi_proc))) {
return OMPI_ERR_OUT_OF_RESOURCE;
continue;
}
/*
@ -100,6 +100,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
tcp_endpoint = OBJ_NEW(mca_btl_tcp_endpoint_t);
if(NULL == tcp_endpoint) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(ompi_proc);
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -107,6 +108,7 @@ int mca_btl_tcp_add_procs( struct mca_btl_base_module_t* btl,
rc = mca_btl_tcp_proc_insert(tcp_proc, tcp_endpoint);
if(rc != OMPI_SUCCESS) {
OPAL_THREAD_UNLOCK(&tcp_proc->proc_lock);
OBJ_RELEASE(ompi_proc);
OBJ_RELEASE(tcp_endpoint);
continue;
}

Просмотреть файл

@ -1,8 +1,9 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -1042,7 +1043,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
mca_btl_tcp_component.tcp_free_list_max,
mca_btl_tcp_component.tcp_free_list_inc,
NULL );
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_max,
sizeof (mca_btl_tcp_frag_max_t) +
mca_btl_tcp_module.super.btl_max_send_size,
@ -1053,7 +1054,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
mca_btl_tcp_component.tcp_free_list_max,
mca_btl_tcp_component.tcp_free_list_inc,
NULL );
ompi_free_list_init_new( &mca_btl_tcp_component.tcp_frag_user,
sizeof (mca_btl_tcp_frag_user_t),
opal_cache_line_size,
@ -1063,7 +1064,7 @@ mca_btl_base_module_t** mca_btl_tcp_component_init(int *num_btl_modules,
mca_btl_tcp_component.tcp_free_list_max,
mca_btl_tcp_component.tcp_free_list_inc,
NULL );
/* create a BTL TCP module for selected interfaces */
if(OMPI_SUCCESS != (ret = mca_btl_tcp_component_create_instances() )) {
return 0;

Просмотреть файл

@ -1,8 +1,9 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2010 The University of Tennessee and The University
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -114,7 +115,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
if(NULL == btl_proc)
return NULL;
btl_proc->proc_ompi = ompi_proc;
/* add to hash table of all proc instance */
opal_hash_table_set_value_uint64(&mca_btl_tcp_component.tcp_procs,
hash, btl_proc);
@ -126,7 +127,8 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
(void**)&btl_proc->proc_addrs,
&size );
if(rc != OMPI_SUCCESS) {
BTL_ERROR(("ompi_modex_recv: failed with return value=%d", rc));
if(OPAL_ERR_NOT_FOUND != rc)
BTL_ERROR(("ompi_modex_recv: failed with return value=%d", rc));
OBJ_RELEASE(btl_proc);
return NULL;
}
@ -145,6 +147,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(ompi_proc_t* ompi_proc)
OBJ_RELEASE(btl_proc);
return NULL;
}
if(NULL == mca_btl_tcp_component.tcp_local && ompi_proc == ompi_proc_local()) {
mca_btl_tcp_component.tcp_local = btl_proc;
}