From dd623cec34882126f207b152d5b9b528444cd3a9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 11 Mar 2020 07:00:40 -0700 Subject: [PATCH] Correct fence logic in MPI_Init The fence logic in MPI_Init got messed up somehow such that we were always executing a fence, which is not desirable. The logic is supposed to be: * if async fence is requested and we are not collecting data, then do not fence at all * if async fence is requested and we are collecting data, then execute the fence in the background - wait for completion at the end of MPI_Init. * if async fence is not requested, then execute a blocking fence at that point, collecting data as directed. Note that we cannot actually do a blocking fence as we need to cycle the event library via opal_progress as the PMIx progress thread is tied to the OMPI event base. Signed-off-by: Ralph Castain --- ompi/runtime/ompi_mpi_init.c | 55 ++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 30bfccefd3..025ec4ddc5 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -679,38 +679,39 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, #endif if (!ompi_singleton) { - /* If we have a non-blocking fence: - * if we are doing an async modex, but we are collecting all - * data, then execute the non-blocking modex in the background. - * All calls to modex_recv will be cached until the background - * modex completes. If collect_all_data is false, then we skip - * the fence completely and retrieve data on-demand from the - * source node. - * - * If we do not have a non-blocking fence, then we must always - * execute the blocking fence as the system does not support - * later data retrieval. */ if (opal_pmix_base_async_modex) { - /* execute the fence_nb in the background to collect - * the data */ - background_fence = true; - active = true; - OPAL_POST_OBJECT(&active); - PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, - fence_release, - (void*)&active))) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence_nb() failed"; - goto error; + /* if we are doing an async modex, but we are collecting all + * data, then execute the non-blocking modex in the background. + * All calls to modex_recv will be cached until the background + * modex completes. If collect_all_data is false, then we skip + * the fence completely and retrieve data on-demand from the + * source node. + */ + if (opal_pmix_collect_all_data) { + /* execute the fence_nb in the background to collect + * the data */ + background_fence = true; + active = true; + OPAL_POST_OBJECT(&active); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); + if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, + fence_release, + (void*)&active))) { + ret = opal_pmix_convert_status(rc); + error = "PMIx_Fence_nb() failed"; + goto error; + } } - - } else if (!opal_pmix_base_async_modex) { - /* we want to do the modex */ + } else { + /* we want to do the modex - we block at this point, but we must + * do so in a manner that allows us to call opal_progress so our + * event library can be cycled as we have tied PMIx to that + * event base */ active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active))) { + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); + if( PMIX_SUCCESS != rc) { ret = opal_pmix_convert_status(rc); error = "PMIx_Fence() failed"; goto error;