1
1

Revert the MPI_Init fence operations to use volatile bool instead of thread macros.

The problem is that the waiting thread is cycling using OMPI_LAZY_WAIT_FOR_COMPLETION so it can exercise opal_progress. This probably isn't as critical for the modex step, but definitely necessary for the barrier at the end of mpi_init. The problem this creates is that the lazy macro exits as soon as "active" becomes false, and then we destruct the lock.

However, wakeup_thread sets "active" to false - and then calls the condition broadcast to wakeup any waiting threads. So there is a race condition between that broadcast and the lock destruct.

Add OPAL_ACQUIRE_OBJECT and OPAL_POST_OBJECT memory barriers to help protect against thread race conditions on some platforms

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-10-31 05:52:05 -07:00
родитель 1ae8d59404
Коммит 27f3d417ca
4 изменённых файлов: 26 добавлений и 17 удалений

Просмотреть файл

@ -95,8 +95,10 @@ extern bool ompi_enable_timing;
static void fence_cbfunc(int status, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
*active = false;
volatile bool *active = (volatile bool*)cbdata;
OPAL_ACQUIRE_OBJECT(active);
*active = false;
OPAL_POST_OBJECT(active);
}
int ompi_mpi_finalize(void)
@ -256,6 +258,7 @@ int ompi_mpi_finalize(void)
if (!ompi_async_mpi_finalize) {
if (NULL != opal_pmix.fence_nb) {
active = true;
OPAL_POST_OBJECT(&active);
/* Note that use of the non-blocking PMIx fence will
* allow us to lazily cycle calling
* opal_progress(), which will allow any other pending

Просмотреть файл

@ -366,8 +366,10 @@ static int ompi_register_mca_variables(void)
static void fence_release(int status, void *cbdata)
{
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
OPAL_PMIX_WAKEUP_THREAD(lock);
volatile bool *active = (volatile bool*)cbdata;
OPAL_ACQUIRE_OBJECT(active);
*active = false;
OPAL_POST_OBJECT(active);
}
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
@ -379,7 +381,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_errhandler_errtrk_t errtrk;
opal_list_t info;
opal_value_t *kv;
opal_pmix_lock_t lock;
volatile bool active;
bool background_fence = false;
OMPI_TIMING_INIT(32);
@ -684,16 +686,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* execute the fence_nb in the background to collect
* the data */
background_fence = true;
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
opal_pmix.fence_nb(NULL, true, fence_release, (void*)&lock);
active = true;
OPAL_POST_OBJECT(&active);
opal_pmix.fence_nb(NULL, true, fence_release, (void*)&active);
} else if (!opal_pmix_base_async_modex) {
/* we want to do the modex */
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
active = true;
OPAL_POST_OBJECT(&active);
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
fence_release, (void*)&lock);
fence_release, (void*)&active);
/* cannot just wait on thread as we need to call opal_progress */
OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active);
OPAL_PMIX_DESTRUCT_LOCK(&lock);
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
}
/* otherwise, we don't want to do the modex, so fall thru */
} else if (!opal_pmix_base_async_modex || opal_pmix_collect_all_data) {
@ -867,18 +870,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
* we have to wait here for it to complete. However, there
* is no reason to do two barriers! */
if (background_fence) {
OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active);
OPAL_PMIX_DESTRUCT_LOCK(&lock);
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else if (!ompi_async_mpi_init) {
/* wait for everyone to reach this point - this is a hard
* barrier requirement at this time, though we hope to relax
* it at a later point */
if (NULL != opal_pmix.fence_nb) {
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
active = true;
OPAL_POST_OBJECT(&active);
opal_pmix.fence_nb(NULL, false,
fence_release, (void*)&lock);
OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active);
OPAL_PMIX_DESTRUCT_LOCK(&lock);
fence_release, (void*)&active);
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else {
opal_pmix.fence(NULL, false);
}

Просмотреть файл

@ -82,10 +82,12 @@ extern opal_pmix_base_t opal_pmix_base;
OBJ_CONSTRUCT(&(l)->mutex, opal_mutex_t); \
pthread_cond_init(&(l)->cond, NULL); \
(l)->active = true; \
OPAL_POST_OBJECT((l)); \
} while(0)
#define OPAL_PMIX_DESTRUCT_LOCK(l) \
do { \
OPAL_ACQUIRE_OBJECT((l)); \
OBJ_DESTRUCT(&(l)->mutex); \
pthread_cond_destroy(&(l)->cond); \
} while(0)

Просмотреть файл

@ -75,10 +75,12 @@ typedef struct {
PMIX_CONSTRUCT(&(l)->mutex, pmix_mutex_t); \
pthread_cond_init(&(l)->cond, NULL); \
(l)->active = true; \
PMIX_POST_OBJECT((l)); \
} while(0)
#define PMIX_DESTRUCT_LOCK(l) \
do { \
PMIX_ACQUIRE_OBJECT((l)); \
PMIX_DESTRUCT(&(l)->mutex); \
pthread_cond_destroy(&(l)->cond); \
} while(0)