From b955dbd6d96ebbbf2424b14586f7a56572bd2bf7 Mon Sep 17 00:00:00 2001
From: Rolf vandeVaart <rvandevaart@nvidia.com>
Date: Fri, 13 Dec 2013 21:25:07 +0000
Subject: [PATCH] Fix various items discovered by review of ticket #3951.

This commit was SVN r29900.
---
 ompi/mca/btl/base/btl_base_mca.c          | 4 ++--
 ompi/mca/btl/openib/btl_openib_mca.c      | 6 +++---
 ompi/mca/common/cuda/common_cuda.c        | 5 +----
 ompi/mca/common/cuda/common_cuda.h        | 3 +--
 ompi/mca/mpool/grdma/mpool_grdma_module.c | 4 ++--
 ompi/mca/pml/ob1/help-mpi-pml-ob1.txt     | 4 ++--
 ompi/mca/pml/ob1/pml_ob1.c                | 2 ++
 7 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/ompi/mca/btl/base/btl_base_mca.c b/ompi/mca/btl/base/btl_base_mca.c
index 4b250d4f75..b5ad3e7bd1 100644
--- a/ompi/mca/btl/base/btl_base_mca.c
+++ b/ompi/mca/btl/base/btl_base_mca.c
@@ -84,12 +84,12 @@ int mca_btl_base_param_register(mca_base_component_t *version,
     }
     (void) mca_base_component_var_register(version, "cuda_eager_limit", "Maximum size (in bytes, including header) of \"GPU short\" messages (must be >= 1).",
                                            MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
+                                           OPAL_INFO_LVL_5,
                                            MCA_BASE_VAR_SCOPE_READONLY,
                                            &module->btl_cuda_eager_limit);
     (void) mca_base_component_var_register(version, "cuda_rdma_limit", "Size (in bytes, including header) of GPU buffer when switch to rndv protocol and pipeline.",
                                            MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
+                                           OPAL_INFO_LVL_5,
                                            MCA_BASE_VAR_SCOPE_READONLY,
                                            &module->btl_cuda_rdma_limit);
 #endif /* OPAL_CUDA_GDR_SUPPORT */
diff --git a/ompi/mca/btl/openib/btl_openib_mca.c b/ompi/mca/btl/openib/btl_openib_mca.c
index 02aa2b869a..036423e351 100644
--- a/ompi/mca/btl/openib/btl_openib_mca.c
+++ b/ompi/mca/btl/openib/btl_openib_mca.c
@@ -588,7 +588,7 @@ int btl_openib_register_mca_params(void)
                                            "Whether CUDA GPU Direct RDMA support is built into library or not",
                                            MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
                                            MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_4,
+                                           OPAL_INFO_LVL_5,
                                            MCA_BASE_VAR_SCOPE_CONSTANT,
                                            &mca_btl_openib_component.cuda_have_gdr);
 
@@ -602,14 +602,14 @@ int btl_openib_register_mca_params(void)
                                            "Whether Infiniband driver has GPU Direct RDMA support",
                                            MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
                                            MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_4,
+                                           OPAL_INFO_LVL_5,
                                            MCA_BASE_VAR_SCOPE_CONSTANT,
                                            &mca_btl_openib_component.driver_have_gdr);
 
     /* Default for GPU Direct RDMA is off for now */
     CHECK(reg_bool("want_cuda_gdr", NULL,
                    "Enable or disable CUDA GPU Direct RDMA support "
-                   "(true = yes; false = no)",
+                   "(true = enabled; false = disabled)",
                    false, &mca_btl_openib_component.cuda_want_gdr));
 
     if (mca_btl_openib_component.cuda_want_gdr && !mca_btl_openib_component.cuda_have_gdr) {
diff --git a/ompi/mca/common/cuda/common_cuda.c b/ompi/mca/common/cuda/common_cuda.c
index 62c14a0168..eb4c459102 100644
--- a/ompi/mca/common/cuda/common_cuda.c
+++ b/ompi/mca/common/cuda/common_cuda.c
@@ -1644,7 +1644,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
     return 0;
 }
 
-#if OPAL_CUDA_GDR_SUPPORT && OMPI_GDR_SUPPORT
+#if OPAL_CUDA_GDR_SUPPORT
 /* Check to see if the memory was freed between the time it was stored in
  * the registration cache and now.  Return true if the memory was previously
  * freed.  This is indicated by the BUFFER_ID value in the registration cache
@@ -1702,10 +1702,7 @@ void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
     if (CUDA_SUCCESS != res) {
         opal_show_help("help-mpi-common-cuda.txt", "cuPointerSetAttribute failed",
                        true, ompi_process_info.nodename, res, dbuf);
-        return OMPI_ERROR;
     }
-
-
 }
 #endif /* OPAL_CUDA_GDR_SUPPORT */       
 
diff --git a/ompi/mca/common/cuda/common_cuda.h b/ompi/mca/common/cuda/common_cuda.h
index ea2aca48bd..cfc8f32027 100644
--- a/ompi/mca/common/cuda/common_cuda.h
+++ b/ompi/mca/common/cuda/common_cuda.h
@@ -32,7 +32,6 @@ struct mca_mpool_common_cuda_reg_t {
 };
 typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
 extern bool mca_common_cuda_enabled;
-#define OMPI_GDR_SUPPORT 1
 
 OMPI_DECLSPEC int mca_common_cuda_register_mca_variables(void);
 
@@ -75,7 +74,7 @@ OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
 OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
 OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
 OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
-#if OPAL_CUDA_GDR_SUPPORT && OMPI_GDR_SUPPORT
+#if OPAL_CUDA_GDR_SUPPORT
 OMPI_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
 OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
 #endif /* OPAL_CUDA_GDR_SUPPORT */
diff --git a/ompi/mca/mpool/grdma/mpool_grdma_module.c b/ompi/mca/mpool/grdma/mpool_grdma_module.c
index 8f207634ed..437d815caf 100644
--- a/ompi/mca/mpool/grdma/mpool_grdma_module.c
+++ b/ompi/mca/mpool/grdma/mpool_grdma_module.c
@@ -476,12 +476,12 @@ static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *add
 
     mpool->rcache->rcache_find(mpool->rcache, addr, size, &reg);
     if (NULL == reg) {
-        return 0;
+        return OMPI_SUCCESS;
     }
 
     /* If not previously freed memory, just return 0 */
     if (!(mca_common_cuda_previously_freed_memory(reg))) {
-        return 0;
+        return OMPI_SUCCESS;
     }
 
     /* mpool->rcache->rcache_dump_range(mpool->rcache, 0, (size_t)-1, "Before free"); */
diff --git a/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt b/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt
index e7828bca6c..4278bd2059 100644
--- a/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt
+++ b/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt
@@ -17,7 +17,7 @@ name and its corresponding minimum value is shown below.
   BTL eager limit value:   %d (set via btl_%s_eager_limit)
   BTL eager limit minimum: %d
   MCA parameter name:      btl_%s_eager_limit 
-
+#
 [cuda_eager_limit_too_small]
 The "CUDA eager limit" MCA parameter in the %s BTL was set to a value which
 is too low for Open MPI to function properly.  Please re-run your job
@@ -29,7 +29,7 @@ name and its corresponding minimum value is shown below.
   BTL CUDA eager limit value:   %d (set via btl_%s_cuda_eager_limit)
   BTL CUDA eager limit minimum: %d
   MCA parameter name:           btl_%s_cuda_eager_limit 
-
+#
 [cuda_rdma_limit_too_small]
 The "CUDA rdma limit" MCA parameter in the %s BTL was set to a value which
 is too low for Open MPI to function properly.  Please re-run your job
diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c
index 25c69075ef..bfb975afa5 100644
--- a/ompi/mca/pml/ob1/pml_ob1.c
+++ b/ompi/mca/pml/ob1/pml_ob1.c
@@ -372,6 +372,8 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
         if (SIZE_MAX == sm->btl_module->btl_cuda_eager_limit) {
             sm->btl_module->btl_cuda_eager_limit = sizeof(mca_pml_ob1_hdr_t);
         }
+        /* If size is 0, then this value is unused.  If it is non-zero then do some
+         * extra checking of it. */
         if (0 != sm->btl_module->btl_cuda_eager_limit) {
             if (sm->btl_module->btl_cuda_eager_limit < sizeof(mca_pml_ob1_hdr_t)) {
                 opal_show_help("help-mpi-pml-ob1.txt", "cuda_eager_limit_too_small",