Correct fence logic in MPI_Init
The fence logic in MPI_Init got messed up somehow such that we were always executing a fence, which is not desirable. The logic is supposed to be: * if async fence is requested and we are not collecting data, then do not fence at all * if async fence is requested and we are collecting data, then execute the fence in the background - wait for completion at the end of MPI_Init. * if async fence is not requested, then execute a blocking fence at that point, collecting data as directed. Note that we cannot actually do a blocking fence as we need to cycle the event library via opal_progress as the PMIx progress thread is tied to the OMPI event base. Signed-off-by: Ralph Castain <rhc@pmix.org>
Этот коммит содержится в:
родитель
11028d0322
Коммит
dd623cec34
@ -679,38 +679,39 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!ompi_singleton) {
|
if (!ompi_singleton) {
|
||||||
/* If we have a non-blocking fence:
|
|
||||||
* if we are doing an async modex, but we are collecting all
|
|
||||||
* data, then execute the non-blocking modex in the background.
|
|
||||||
* All calls to modex_recv will be cached until the background
|
|
||||||
* modex completes. If collect_all_data is false, then we skip
|
|
||||||
* the fence completely and retrieve data on-demand from the
|
|
||||||
* source node.
|
|
||||||
*
|
|
||||||
* If we do not have a non-blocking fence, then we must always
|
|
||||||
* execute the blocking fence as the system does not support
|
|
||||||
* later data retrieval. */
|
|
||||||
if (opal_pmix_base_async_modex) {
|
if (opal_pmix_base_async_modex) {
|
||||||
/* execute the fence_nb in the background to collect
|
/* if we are doing an async modex, but we are collecting all
|
||||||
* the data */
|
* data, then execute the non-blocking modex in the background.
|
||||||
background_fence = true;
|
* All calls to modex_recv will be cached until the background
|
||||||
active = true;
|
* modex completes. If collect_all_data is false, then we skip
|
||||||
OPAL_POST_OBJECT(&active);
|
* the fence completely and retrieve data on-demand from the
|
||||||
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
|
* source node.
|
||||||
if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0,
|
*/
|
||||||
fence_release,
|
if (opal_pmix_collect_all_data) {
|
||||||
(void*)&active))) {
|
/* execute the fence_nb in the background to collect
|
||||||
ret = opal_pmix_convert_status(rc);
|
* the data */
|
||||||
error = "PMIx_Fence_nb() failed";
|
background_fence = true;
|
||||||
goto error;
|
active = true;
|
||||||
|
OPAL_POST_OBJECT(&active);
|
||||||
|
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
|
||||||
|
if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0,
|
||||||
|
fence_release,
|
||||||
|
(void*)&active))) {
|
||||||
|
ret = opal_pmix_convert_status(rc);
|
||||||
|
error = "PMIx_Fence_nb() failed";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
} else if (!opal_pmix_base_async_modex) {
|
/* we want to do the modex - we block at this point, but we must
|
||||||
/* we want to do the modex */
|
* do so in a manner that allows us to call opal_progress so our
|
||||||
|
* event library can be cycled as we have tied PMIx to that
|
||||||
|
* event base */
|
||||||
active = true;
|
active = true;
|
||||||
OPAL_POST_OBJECT(&active);
|
OPAL_POST_OBJECT(&active);
|
||||||
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
|
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
|
||||||
if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active))) {
|
rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active);
|
||||||
|
if( PMIX_SUCCESS != rc) {
|
||||||
ret = opal_pmix_convert_status(rc);
|
ret = opal_pmix_convert_status(rc);
|
||||||
error = "PMIx_Fence() failed";
|
error = "PMIx_Fence() failed";
|
||||||
goto error;
|
goto error;
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user