From 3290cde6306caa2475cd06253e1cc051ed248bb6 Mon Sep 17 00:00:00 2001
From: Rolf vandeVaart <rvandevaart@nvidia.com>
Date: Thu, 7 Nov 2013 19:45:56 +0000
Subject: [PATCH] Various minor changes to bring smcuda up to date with sm.

This commit was SVN r29639.
---
 ompi/mca/btl/smcuda/btl_smcuda.c           | 10 +++----
 ompi/mca/btl/smcuda/btl_smcuda_component.c | 31 ++++++++++++----------
 ompi/mca/btl/smcuda/btl_smcuda_frag.h      | 22 +++++++--------
 3 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/ompi/mca/btl/smcuda/btl_smcuda.c b/ompi/mca/btl/smcuda/btl_smcuda.c
index 1508e7937a..3092bbf236 100644
--- a/ompi/mca/btl/smcuda/btl_smcuda.c
+++ b/ompi/mca/btl/smcuda/btl_smcuda.c
@@ -111,7 +111,6 @@ mca_btl_smcuda_t mca_btl_smcuda = {
 static void mca_btl_smcuda_send_cuda_ipc_request(struct mca_btl_base_module_t* btl,
                                                  struct mca_btl_base_endpoint_t* endpoint);
 #endif /* OPAL_CUDA_SUPPORT */
-
 /*
  * calculate offset of an address from the beginning of a shared memory segment
  */
@@ -192,7 +191,7 @@ sm_segment_attach(mca_btl_smcuda_component_t *comp_ptr)
     }
     if (-1 == (fd = open(comp_ptr->sm_rndv_file_name, O_RDONLY))) {
         int err = errno;
-        opal_show_help("help-mpi-btl-sm.txt", "sys call fail", true,
+        opal_show_help("help-mpi-btl-smcuda.txt", "sys call fail", true,
                        "open(2)", strerror(err), err);
         rc = OMPI_ERR_IN_ERRNO;
         goto out;
@@ -251,7 +250,7 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
 
         /* If we find >0 NUMA nodes, then investigate further */
         if (i > 0) {
-            int numa, w;
+            int numa=0, w;
             unsigned n_bound=0;
             hwloc_cpuset_t avail;
             hwloc_obj_t obj;
@@ -548,7 +547,6 @@ int mca_btl_smcuda_add_procs(
         peers[proc]->ipcstate = IPC_INIT;
         peers[proc]->ipctries = 0;
 #endif /* OPAL_CUDA_SUPPORT */
-
         n_local_procs++;
 
         /* add this proc to shared memory accessibility list */
@@ -794,7 +792,6 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
     uint32_t iov_count = 1;
     size_t max_data = *size;
     int rc;
-
 #if OPAL_CUDA_SUPPORT
     if (0 != reserve) {
 #endif /* OPAL_CUDA_SUPPORT */
@@ -918,7 +915,6 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
     if ( mca_btl_smcuda_component.num_outstanding_frags * 2 > (int) mca_btl_smcuda_component.fifo_size ) {
         mca_btl_smcuda_component_progress();
     }
-
 #if OPAL_CUDA_SUPPORT
     /* Initiate setting up CUDA IPC support. */
     if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
@@ -977,6 +973,7 @@ int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
         OPAL_THREAD_ADD32(&mca_btl_smcuda_component.num_outstanding_frags, +1);
         MCA_BTL_SMCUDA_FIFO_WRITE(endpoint, endpoint->my_smp_rank,
                               endpoint->peer_smp_rank, (void *) VIRTUAL2RELATIVE(frag->hdr), false, true, rc);
+        (void)rc; /* this is safe to ignore as the message is requeued till success */
         return OMPI_SUCCESS;
     }
 
@@ -1003,7 +1000,6 @@ int mca_btl_smcuda_send( struct mca_btl_base_module_t* btl,
     if ( mca_btl_smcuda_component.num_outstanding_frags * 2 > (int) mca_btl_smcuda_component.fifo_size ) {
         mca_btl_smcuda_component_progress();
     }
-
 #if OPAL_CUDA_SUPPORT
     /* Initiate setting up CUDA IPC support */
     if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) {
diff --git a/ompi/mca/btl/smcuda/btl_smcuda_component.c b/ompi/mca/btl/smcuda/btl_smcuda_component.c
index b1d2ba492a..7e4d4082c2 100644
--- a/ompi/mca/btl/smcuda/btl_smcuda_component.c
+++ b/ompi/mca/btl/smcuda/btl_smcuda_component.c
@@ -53,11 +53,11 @@
 #include "ompi/mca/common/sm/common_sm.h"
 #include "ompi/mca/btl/base/btl_base_error.h"
 #include "ompi/mca/rte/rte.h"
+
 #if OPAL_CUDA_SUPPORT
 #include "ompi/runtime/params.h"
 #include "ompi/mca/common/cuda/common_cuda.h"
 #endif /* OPAL_CUDA_SUPPORT */
-
 #if OPAL_ENABLE_FT_CR    == 1
 #include "opal/runtime/opal_cr.h"
 #endif
@@ -117,12 +117,13 @@ mca_btl_smcuda_component_t mca_btl_smcuda_component = {
 static inline int mca_btl_smcuda_param_register_int(
     const char* param_name,
     int default_value,
+    int level,
     int *storage)
 {
     *storage = default_value;
     (void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version,
                                             param_name, NULL, MCA_BASE_VAR_TYPE_INT,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
+                                            NULL, 0, 0, level,
                                             MCA_BASE_VAR_SCOPE_READONLY, storage);
     return *storage;
 }
@@ -130,12 +131,13 @@ static inline int mca_btl_smcuda_param_register_int(
 static inline unsigned int mca_btl_smcuda_param_register_uint(
     const char* param_name,
     unsigned int default_value,
+    int level,
     unsigned int *storage)
 {
     *storage = default_value;
     (void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version,
                                             param_name, NULL, MCA_BASE_VAR_TYPE_UNSIGNED_INT,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
+                                            NULL, 0, 0, level,
                                             MCA_BASE_VAR_SCOPE_READONLY, storage);
     return *storage;
 }
@@ -148,20 +150,20 @@ static int mca_btl_smcuda_component_verify(void) {
 static int smcuda_register(void)
 {
     /* register SM component parameters */
-    mca_btl_smcuda_param_register_int("free_list_num", 8, &mca_btl_smcuda_component.sm_free_list_num);
-    mca_btl_smcuda_param_register_int("free_list_max", -1, &mca_btl_smcuda_component.sm_free_list_max);
-    mca_btl_smcuda_param_register_int("free_list_inc", 64, &mca_btl_smcuda_component.sm_free_list_inc);
-    mca_btl_smcuda_param_register_int("max_procs", -1, &mca_btl_smcuda_component.sm_max_procs);
+    mca_btl_smcuda_param_register_int("free_list_num", 8, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_num);
+    mca_btl_smcuda_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_max);
+    mca_btl_smcuda_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_inc);
+    mca_btl_smcuda_param_register_int("max_procs", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_max_procs);
     /* there is no practical use for the mpool name parameter since mpool resources differ
        between components */
     mca_btl_smcuda_component.sm_mpool_name = "sm";
-    mca_btl_smcuda_param_register_uint("fifo_size", 4096, &mca_btl_smcuda_component.fifo_size);
-    mca_btl_smcuda_param_register_int("num_fifos", 1, &mca_btl_smcuda_component.nfifos);
+    mca_btl_smcuda_param_register_uint("fifo_size", 4096, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.fifo_size);
+    mca_btl_smcuda_param_register_int("num_fifos", 1, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.nfifos);
 
-    mca_btl_smcuda_param_register_uint("fifo_lazy_free", 120, &mca_btl_smcuda_component.fifo_lazy_free);
+    mca_btl_smcuda_param_register_uint("fifo_lazy_free", 120, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.fifo_lazy_free);
 
     /* default number of extra procs to allow for future growth */
-    mca_btl_smcuda_param_register_int("sm_extra_procs", 0, &mca_btl_smcuda_component.sm_extra_procs);
+    mca_btl_smcuda_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9, &mca_btl_smcuda_component.sm_extra_procs);
 
 #if OPAL_CUDA_SUPPORT
     /* Lower priority when CUDA support is not requested */
@@ -170,9 +172,9 @@ static int smcuda_register(void)
     } else {
         mca_btl_smcuda.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW;
     }
-    mca_btl_smcuda_param_register_int("use_cuda_ipc", 1, &mca_btl_smcuda_component.use_cuda_ipc);
-    mca_btl_smcuda_param_register_int("use_cuda_ipc_same_gpu", 1, &mca_btl_smcuda_component.use_cuda_ipc_same_gpu);
-    mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, &mca_btl_smcuda_component.cuda_ipc_verbose);
+    mca_btl_smcuda_param_register_int("use_cuda_ipc", 1, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.use_cuda_ipc);
+    mca_btl_smcuda_param_register_int("use_cuda_ipc_same_gpu", 1, OPAL_INFO_LVL_4,&mca_btl_smcuda_component.use_cuda_ipc_same_gpu);
+    mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.cuda_ipc_verbose);
     mca_btl_smcuda_component.cuda_ipc_output = opal_output_open(NULL);
     opal_output_set_verbosity(mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component.cuda_ipc_verbose);
 #else /* OPAL_CUDA_SUPPORT */
@@ -1125,6 +1127,7 @@ int mca_btl_smcuda_component_progress(void)
                 break;
         }
     }
+    (void)rc; /* this is safe to ignore as the message is requeued till success */
 
 #if OPAL_CUDA_SUPPORT
     /* Check to see if there are any outstanding CUDA events that have
diff --git a/ompi/mca/btl/smcuda/btl_smcuda_frag.h b/ompi/mca/btl/smcuda/btl_smcuda_frag.h
index 8e430cc226..74e27fa636 100644
--- a/ompi/mca/btl/smcuda/btl_smcuda_frag.h
+++ b/ompi/mca/btl/smcuda/btl_smcuda_frag.h
@@ -84,29 +84,29 @@ OBJ_CLASS_DECLARATION(mca_btl_smcuda_frag1_t);
 OBJ_CLASS_DECLARATION(mca_btl_smcuda_frag2_t);
 OBJ_CLASS_DECLARATION(mca_btl_smcuda_user_t);
 
-#define MCA_BTL_SMCUDA_FRAG_ALLOC_EAGER(frag)                           \
+#define MCA_BTL_SMCUDA_FRAG_ALLOC_EAGER(frag)                               \
 {                                                                       \
     ompi_free_list_item_t* item;                                        \
-    OMPI_FREE_LIST_GET_MT(&mca_btl_smcuda_component.sm_frags_eager, item); \
-    frag = (mca_btl_smcuda_frag_t*)item;                                \
+    OMPI_FREE_LIST_GET_MT(&mca_btl_smcuda_component.sm_frags_eager, item);     \
+    frag = (mca_btl_smcuda_frag_t*)item;                                    \
 }
 
-#define MCA_BTL_SMCUDA_FRAG_ALLOC_MAX(frag)                             \
+#define MCA_BTL_SMCUDA_FRAG_ALLOC_MAX(frag)                                 \
 {                                                                       \
     ompi_free_list_item_t* item;                                        \
-    OMPI_FREE_LIST_GET_MT(&mca_btl_smcuda_component.sm_frags_max, item);   \
-    frag = (mca_btl_smcuda_frag_t*)item;                                \
+    OMPI_FREE_LIST_GET_MT(&mca_btl_smcuda_component.sm_frags_max, item);       \
+    frag = (mca_btl_smcuda_frag_t*)item;                                    \
 }
 
-#define MCA_BTL_SMCUDA_FRAG_ALLOC_USER(frag)                            \
-    {                                                                   \
+#define MCA_BTL_SMCUDA_FRAG_ALLOC_USER(frag)                                \
+{                                                                       \
 	ompi_free_list_item_t* item;                                        \
-	OMPI_FREE_LIST_GET_MT(&mca_btl_smcuda_component.sm_frags_user, item);  \
-	frag = (mca_btl_smcuda_frag_t*)item;                                \
+	OMPI_FREE_LIST_GET_MT(&mca_btl_smcuda_component.sm_frags_user, item);      \
+	frag = (mca_btl_smcuda_frag_t*)item;                                    \
 }
 
 
-#define MCA_BTL_SMCUDA_FRAG_RETURN(frag)                                  \
+#define MCA_BTL_SMCUDA_FRAG_RETURN(frag)                                      \
 {                                                                         \
     OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t*)(frag)); \
 }