The ib_procs list in the openib btl is accessed without the ib lock in some cases. This causes races when running multithreaded. This patch adds protection of the ib_procs list with the ib_lock.
fixes trac:2149 cmr:v1.4 This commit was SVN r22682. The following Trac tickets were found above: Ticket 2149 --> https://svn.open-mpi.org/trac/ompi/ticket/2149
Этот коммит содержится в:
родитель
6828122069
Коммит
322e73d8c4
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -1816,6 +1817,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
|
||||
/* JMS: optimization target -- can we send something in private
|
||||
data to find the proc directly instead of having to search
|
||||
through *all* procs? */
|
||||
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
|
||||
for (found = false, ib_proc = (mca_btl_openib_proc_t*)
|
||||
opal_list_get_first(&mca_btl_openib_component.ib_procs);
|
||||
!found &&
|
||||
@ -1850,6 +1852,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
|
||||
}
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
|
||||
if (!found) {
|
||||
BTL_VERBOSE(("could not find match for calling endpoint!"));
|
||||
rc = OMPI_ERR_NOT_FOUND;
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -728,6 +729,9 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME,
|
||||
process_name) > 0 ? true : false;
|
||||
|
||||
/* Need to protect the ib_procs list */
|
||||
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
|
||||
|
||||
for (ib_proc = (mca_btl_openib_proc_t*)
|
||||
opal_list_get_first(&mca_btl_openib_component.ib_procs);
|
||||
ib_proc != (mca_btl_openib_proc_t*)
|
||||
@ -780,6 +784,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
just ignore this connection request */
|
||||
if (found && !master &&
|
||||
MCA_BTL_IB_CLOSED != ib_endpoint->endpoint_state) {
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -787,6 +792,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
if (!found) {
|
||||
BTL_ERROR(("can't find suitable endpoint for this peer\n"));
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -869,4 +875,5 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
|
||||
}
|
||||
break;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -658,6 +659,7 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces
|
||||
"jobid %d, vpid %d, sid %" PRIx64 ", lid %d",
|
||||
process_name->jobid, process_name->vpid, subnet_id, lid));
|
||||
/* find ibproc */
|
||||
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
|
||||
for (ib_proc = (mca_btl_openib_proc_t*)
|
||||
opal_list_get_first(&mca_btl_openib_component.ib_procs);
|
||||
ib_proc != (mca_btl_openib_proc_t*)
|
||||
@ -696,6 +698,7 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces
|
||||
} else {
|
||||
BTL_ERROR(("can't find suitable endpoint for this peer\n"));
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
|
||||
return ib_endpoint;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user