The code tries to prevent itself from running for more then one communicator
simultaneously, but is doing it incorrectly. If the function is running already for one communicator and it is called from another thread for other communicator with lower cid the check comm->c_contextid != ompi_comm_lowest_cid() will fail and the function will be executed for two different communicators by two threads simultaneously. There is nothing in the algorithm that prevent it from been running simultaneously for different communicators as far as I can see, but ompi_comm_unregister_cid() assumes that it is always called for a communicator with the lowest cid and this is not always the case. This patch removes bogus lowest cid check and fix ompi_comm_register_cid() to properly remove cid from the list. This commit was SVN r16088.
Этот коммит содержится в:
родитель
c1065d8262
Коммит
58a018c16d
@ -11,6 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
|
||||
* Copyright (c) 2007 Voltaire All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -170,15 +171,6 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
|
||||
* This is the real algorithm described in the doc
|
||||
*/
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_cid_lock);
|
||||
if (comm->c_contextid != ompi_comm_lowest_cid() ) {
|
||||
/* if not lowest cid, we do not continue, but sleep and try again */
|
||||
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
|
||||
continue;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
|
||||
|
||||
|
||||
for (i=start; i < mca_pml.pml_max_contextid ; i++) {
|
||||
flag=ompi_pointer_array_test_and_set_item(&ompi_mpi_communicators,
|
||||
i, comm);
|
||||
@ -365,10 +357,18 @@ static int ompi_comm_register_cid (uint32_t cid )
|
||||
|
||||
static int ompi_comm_unregister_cid (uint32_t cid)
|
||||
{
|
||||
ompi_comm_reg_t *regcom=NULL;
|
||||
opal_list_item_t *item=opal_list_remove_first(&ompi_registered_comms);
|
||||
ompi_comm_reg_t *regcom;
|
||||
opal_list_item_t *item;
|
||||
|
||||
regcom = (ompi_comm_reg_t *) item;
|
||||
for (item = opal_list_get_first(&ompi_registered_comms);
|
||||
item != opal_list_get_end(&ompi_registered_comms);
|
||||
item = opal_list_get_next(item)) {
|
||||
regcom = (ompi_comm_reg_t *)item;
|
||||
if(regcom->cid == cid) {
|
||||
opal_list_remove_item(&ompi_registered_comms, item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(regcom);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user