1
1

Merge pull request #1592 from hjelmn/tcp_thread_fix

btl/tcp: fix add_procs race condition
Этот коммит содержится в:
Nathan Hjelm 2016-04-27 11:40:00 -06:00
родитель 01c90d4e71 03f4a854cb
Коммит 936dfe5c26

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Research Organization for Information Science * Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -122,16 +122,16 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
return btl_proc; return btl_proc;
} }
do {
btl_proc = OBJ_NEW(mca_btl_tcp_proc_t); btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
if(NULL == btl_proc) if(NULL == btl_proc) {
return NULL; rc = OPAL_ERR_OUT_OF_RESOURCE;
btl_proc->proc_opal = proc; break;
OBJ_RETAIN(btl_proc->proc_opal); }
/* add to hash table of all proc instance */ btl_proc->proc_opal = proc;
opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs,
proc->proc_name, btl_proc); OBJ_RETAIN(btl_proc->proc_opal);
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
/* lookup tcp parameters exported by this proc */ /* lookup tcp parameters exported by this proc */
OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version, OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version,
@ -139,14 +139,16 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
if(rc != OPAL_SUCCESS) { if(rc != OPAL_SUCCESS) {
if(OPAL_ERR_NOT_FOUND != rc) if(OPAL_ERR_NOT_FOUND != rc)
BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc)); BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc));
OBJ_RELEASE(btl_proc); break;
return NULL;
} }
if(0 != (size % sizeof(mca_btl_tcp_addr_t))) { if(0 != (size % sizeof(mca_btl_tcp_addr_t))) {
BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n", BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n",
(unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t))); (unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t)));
return NULL; rc = OPAL_ERROR;
break;
} }
btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t); btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t);
/* allocate space for endpoint array - one for each exported address */ /* allocate space for endpoint array - one for each exported address */
@ -154,20 +156,19 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
malloc((1 + btl_proc->proc_addr_count) * malloc((1 + btl_proc->proc_addr_count) *
sizeof(mca_btl_base_endpoint_t*)); sizeof(mca_btl_base_endpoint_t*));
if(NULL == btl_proc->proc_endpoints) { if(NULL == btl_proc->proc_endpoints) {
OBJ_RELEASE(btl_proc); rc = OPAL_ERR_OUT_OF_RESOURCE;
return NULL; break;
} }
if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) { if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) {
mca_btl_tcp_component.tcp_local = btl_proc; mca_btl_tcp_component.tcp_local = btl_proc;
} }
{
/* convert the OPAL addr_family field to OS constants, /* convert the OPAL addr_family field to OS constants,
* so we can check for AF_INET (or AF_INET6) and don't have * so we can check for AF_INET (or AF_INET6) and don't have
* to deal with byte ordering anymore. * to deal with byte ordering anymore.
*/ */
unsigned int i; for (unsigned int i = 0; i < btl_proc->proc_addr_count; i++) {
for (i = 0; i < btl_proc->proc_addr_count; i++) {
if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) { if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) {
btl_proc->proc_addrs[i].addr_family = AF_INET; btl_proc->proc_addrs[i].addr_family = AF_INET;
} }
@ -177,7 +178,21 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
} }
#endif #endif
} }
} while (0);
if (OPAL_SUCCESS == rc) {
/* add to hash table of all proc instance. */
opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs,
proc->proc_name, btl_proc);
} else {
if (btl_proc) {
OBJ_RELEASE(btl_proc);
btl_proc = NULL;
} }
}
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
return btl_proc; return btl_proc;
} }