1
1

The ib_procs list in the openib btl is accessed without the ib lock in some cases. This causes races when running multithreaded. This patch adds protection of the ib_procs list with the ib_lock.

fixes trac:2149 cmr:v1.4

This commit was SVN r22682.

The following Trac tickets were found above:
  Ticket 2149 --> https://svn.open-mpi.org/trac/ompi/ticket/2149
Этот коммит содержится в:
Christopher Yeoh 2010-02-23 05:19:03 +00:00
родитель 6828122069
Коммит 322e73d8c4
3 изменённых файлов: 13 добавлений и 0 удалений

Просмотреть файл

@ -1,6 +1,7 @@
/*
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
*
* $COPYRIGHT$
*
@ -1816,6 +1817,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
/* JMS: optimization target -- can we send something in private
data to find the proc directly instead of having to search
through *all* procs? */
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
for (found = false, ib_proc = (mca_btl_openib_proc_t*)
opal_list_get_first(&mca_btl_openib_component.ib_procs);
!found &&
@ -1850,6 +1852,7 @@ static int request_received(ibcm_listen_cm_id_t *cmh,
}
}
}
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
if (!found) {
BTL_VERBOSE(("could not find match for calling endpoint!"));
rc = OMPI_ERR_NOT_FOUND;

Просмотреть файл

@ -13,6 +13,7 @@
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
*
* $COPYRIGHT$
*
@ -728,6 +729,9 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
master = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME,
process_name) > 0 ? true : false;
/* Need to protect the ib_procs list */
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
for (ib_proc = (mca_btl_openib_proc_t*)
opal_list_get_first(&mca_btl_openib_component.ib_procs);
ib_proc != (mca_btl_openib_proc_t*)
@ -780,6 +784,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
just ignore this connection request */
if (found && !master &&
MCA_BTL_IB_CLOSED != ib_endpoint->endpoint_state) {
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
return;
}
}
@ -787,6 +792,7 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
if (!found) {
BTL_ERROR(("can't find suitable endpoint for this peer\n"));
mca_btl_openib_endpoint_invoke_error(NULL);
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
return;
}
@ -869,4 +875,5 @@ static void rml_recv_cb(int status, orte_process_name_t* process_name,
}
break;
}
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
}

Просмотреть файл

@ -1,6 +1,7 @@
/*
* Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
*
* $COPYRIGHT$
*
@ -658,6 +659,7 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces
"jobid %d, vpid %d, sid %" PRIx64 ", lid %d",
process_name->jobid, process_name->vpid, subnet_id, lid));
/* find ibproc */
OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock);
for (ib_proc = (mca_btl_openib_proc_t*)
opal_list_get_first(&mca_btl_openib_component.ib_procs);
ib_proc != (mca_btl_openib_proc_t*)
@ -696,6 +698,7 @@ static mca_btl_openib_endpoint_t* xoob_find_endpoint(orte_process_name_t* proces
} else {
BTL_ERROR(("can't find suitable endpoint for this peer\n"));
}
OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock);
return ib_endpoint;
}