From dd623cec34882126f207b152d5b9b528444cd3a9 Mon Sep 17 00:00:00 2001
From: Ralph Castain <rhc@pmix.org>
Date: Wed, 11 Mar 2020 07:00:40 -0700
Subject: [PATCH] Correct fence logic in MPI_Init

The fence logic in MPI_Init got messed up somehow such that we were
always executing a fence, which is not desirable. The logic is supposed
to be:

* if async fence is requested and we are not collecting data, then do
not fence at all

* if async fence is requested and we are collecting data, then execute
the fence in the background - wait for completion at the end of MPI_Init.

* if async fence is not requested, then execute a blocking fence at that
point, collecting data as directed. Note that we cannot actually do a
blocking fence as we need to cycle the event library via opal_progress
as the PMIx progress thread is tied to the OMPI event base.

Signed-off-by: Ralph Castain <rhc@pmix.org>
---
 ompi/runtime/ompi_mpi_init.c | 55 ++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c
index 30bfccefd3..025ec4ddc5 100644
--- a/ompi/runtime/ompi_mpi_init.c
+++ b/ompi/runtime/ompi_mpi_init.c
@@ -679,38 +679,39 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
 #endif
 
     if (!ompi_singleton) {
-        /* If we have a non-blocking fence:
-         * if we are doing an async modex, but we are collecting all
-         * data, then execute the non-blocking modex in the background.
-         * All calls to modex_recv will be cached until the background
-         * modex completes. If collect_all_data is false, then we skip
-         * the fence completely and retrieve data on-demand from the
-         * source node.
-         *
-         * If we do not have a non-blocking fence, then we must always
-         * execute the blocking fence as the system does not support
-         * later data retrieval. */
         if (opal_pmix_base_async_modex) {
-            /* execute the fence_nb in the background to collect
-             * the data */
-            background_fence = true;
-            active = true;
-            OPAL_POST_OBJECT(&active);
-            PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
-            if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0,
-                                                    fence_release,
-                                                    (void*)&active))) {
-                ret = opal_pmix_convert_status(rc);
-                error = "PMIx_Fence_nb() failed";
-                goto error;
+            /* if we are doing an async modex, but we are collecting all
+             * data, then execute the non-blocking modex in the background.
+             * All calls to modex_recv will be cached until the background
+             * modex completes. If collect_all_data is false, then we skip
+             * the fence completely and retrieve data on-demand from the
+             * source node.
+             */
+            if (opal_pmix_collect_all_data) {
+                /* execute the fence_nb in the background to collect
+                 * the data */
+                background_fence = true;
+                active = true;
+                OPAL_POST_OBJECT(&active);
+                PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
+                if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0,
+                                                        fence_release,
+                                                        (void*)&active))) {
+                    ret = opal_pmix_convert_status(rc);
+                    error = "PMIx_Fence_nb() failed";
+                    goto error;
+                }
             }
-
-        } else if (!opal_pmix_base_async_modex) {
-            /* we want to do the modex */
+        } else {
+            /* we want to do the modex - we block at this point, but we must
+             * do so in a manner that allows us to call opal_progress so our
+             * event library can be cycled as we have tied PMIx to that
+             * event base */
             active = true;
             OPAL_POST_OBJECT(&active);
             PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
-            if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active))) {
+            rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active);
+            if( PMIX_SUCCESS != rc) {
                 ret = opal_pmix_convert_status(rc);
                 error = "PMIx_Fence() failed";
                 goto error;