cuda: convert to opal_dl interface

2015-02-19 13:59:44 -08:00 · 2015-02-19 13:59:44 -08:00 · 1995f6beba
--- a/config/opal_check_cuda.m4
+++ b/config/opal_check_cuda.m4
@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 dnl                         University of Stuttgart.  All rights reserved.
 dnl Copyright (c) 2004-2005 The Regents of the University of California.
 dnl                         All rights reserved.
-dnl Copyright (c) 2006-2010 Cisco Systems, Inc.  All rights reserved.
+dnl Copyright (c) 2006-2015 Cisco Systems, Inc.  All rights reserved.
 dnl Copyright (c) 2007      Sun Microsystems, Inc.  All rights reserved.
 dnl Copyright (c) 2009      IBM Corporation.  All rights reserved.
 dnl Copyright (c) 2009      Los Alamos National Security, LLC.  All rights
@ -72,10 +72,12 @@ AS_IF([test "$with_cuda" = "no" || test "x$with_cuda" = "x"],
                            opal_cuda_incdir="$with_cuda/include"
                            AC_MSG_RESULT([found ($opal_cuda_incdir/cuda.h)])])])])])
-# We cannot have CUDA support without dlopen support.  Check for that and
+dnl We cannot have CUDA support without dlopen support.  HOWEVER, at
-# error out if the user has also set --disable-dlopen.
+dnl this point in configure, we can't know whether the DL framework
-AS_IF([test "$enable_dlopen" = "no" && test "$opal_check_cuda_happy" = "yes"],
+dnl has been configured or not yet (it likely hasn't, since CUDA is a
-    [AC_MSG_ERROR([--with-cuda cannot be used with --disable-dlopen.  Remove one of them and reconfigure.])])
+dnl common framework, and likely configured first).  So we have to
 dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4
 dnl macro, below).  :-(
 # If we have CUDA support, check to see if we have CUDA 4.1 support
 AS_IF([test "$opal_check_cuda_happy"="yes"],
@ -142,3 +144,21 @@ AC_DEFINE_UNQUOTED([OPAL_CUDA_GDR_SUPPORT],$CUDA_VERSION_60_OR_GREATER,
                   [Whether we have CUDA GDR support available])
 ])
 dnl
 dnl CUDA support requires DL support (it dynamically opens the CUDA
 dnl library at run time).  But we do not check for OPAL DL support
 dnl until lafter the initial OPAL_CHECK_CUDA is called.  So put the
 dnl CUDA+DL check in a separate macro that can be called after the DL MCA
 dnl framework checks in the top-level configure.ac.
 dnl
 AC_DEFUN([OPAL_CHECK_CUDA_AFTER_OPAL_DL],[
    # We cannot have CUDA support without OPAL DL support.  Error out
    # if the user wants CUDA but we do not have OPAL DL support.
    AS_IF([test $OPAL_HAVE_DL_SUPPORT -eq 0 && \
           test "$opal_check_cuda_happy" = "yes"],
          [AC_MSG_WARN([--with-cuda was specified, but dlopen support is disabled.])
           AC_MSG_WARN([You must reconfigure Open MPI with dlopen ("dl") support.])
           AC_MSG_ERROR([Cannot continue.])])
 ])
--- a/configure.ac
+++ b/configure.ac
@ -1157,6 +1157,18 @@ m4_ifdef([project_ompi], [OMPI_REQUIRE_ENDPOINT_TAG_FINI])
 # checkpoint results
 AC_CACHE_SAVE
 ##################################
 # CUDA: part two
 ##################################
 # This is somewhat gross to have a configure check for a common MCA
 # component outside of the normal MCA checks, but this check must come
 # after the opal DL MCA checks have done.  Someday this could perhaps
 # be done better by having some kind of "run this check at the end of
 # all other MCA checks" hook...?
 OPAL_CHECK_CUDA_AFTER_OPAL_DL
 ##################################
 # MPI Extended Interfaces
 ##################################
--- a/opal/mca/common/cuda/common_cuda.c
+++ b/opal/mca/common/cuda/common_cuda.c
@ -10,6 +10,7 @@
 * Copyright (c) 2004-2006 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2011-2015 NVIDIA Corporation.  All rights reserved.
 * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
@ -33,13 +34,13 @@
 #include "opal/datatype/opal_convertor.h"
 #include "opal/datatype/opal_datatype_cuda.h"
 #include "opal/util/output.h"
 #include "opal/util/lt_interface.h"
 #include "opal/util/show_help.h"
 #include "opal/util/proc.h"
 #include "opal/mca/mpool/base/base.h"
 #include "opal/runtime/opal_params.h"
 #include "opal/mca/timer/base/base.h"
 #include "opal/mca/dl/base/base.h"
 #include "common_cuda.h"
@ -55,12 +56,15 @@
 #define OPAL_CUDA_DLSYM(libhandle, funcName)                                         \
 do {                                                                                 \
-    *(void **)(&cuFunc.funcName) = opal_lt_dlsym(libhandle, STRINGIFY(funcName));    \
+ char *err_msg;                                                                      \
-    if (NULL == cuFunc.funcName) {                                                   \
+ void *ptr;                                                                          \
 if (OPAL_SUCCESS !=                                                                 \
     opal_dl_lookup(libhandle, STRINGIFY(funcName), &ptr, &err_msg)) {               \
        opal_show_help("help-mpi-common-cuda.txt", "dlsym failed", true,             \
-                       STRINGIFY(funcName), opal_lt_dlerror());                      \
+                       STRINGIFY(funcName), err_msg);                                \
        return 1;                                                                    \
    } else {                                                                         \
        *(void **)(&cuFunc.funcName) = ptr;                                          \
        opal_output_verbose(15, mca_common_cuda_output,                              \
                            "CUDA: successful dlsym of %s",                          \
                            STRINGIFY(funcName));                                    \
@ -185,7 +189,7 @@ static int cuda_event_dtoh_most = 0;
 static int cuda_event_htod_most = 0;
 /* Handle to libcuda.so */
-opal_lt_dlhandle libcuda_handle = NULL;
+opal_dl_handle_t *libcuda_handle = NULL;
 /* Unused variable that we register at init time and unregister at fini time.
 * This is used to detect if user has done a device reset prior to MPI_Finalize.
@ -233,9 +237,7 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
 */
 int mca_common_cuda_stage_one_init(void)
 {
    opal_lt_dladvise advise;
    int retval, i, j;
    int advise_support = 1;
    char *cudalibs[] = {"libcuda.so.1", "libcuda.dylib", NULL};
    char *searchpaths[] = {"", "/usr/lib64", NULL};
    char **errmsgs = NULL;
@ -339,120 +341,76 @@ int mca_common_cuda_stage_one_init(void)
        return 1;
    }
-    if (0 != (retval = opal_lt_dlinit())) {
+    if (!OPAL_HAVE_DL_SUPPORT) {
-        if (OPAL_ERR_NOT_SUPPORTED == retval) {
+        opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
            opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
        } else {
            opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
                           "opal_lt_dlinit", retval, opal_lt_dlerror());
        }
        return 1;
    }
    /* Initialize the lt_dladvise structure.  If this does not work, we can
     * proceed without the support.  Things should still work.  */
    if (0 != (retval = opal_lt_dladvise_init(&advise))) {
        if (OPAL_ERR_NOT_SUPPORTED == retval) {
            advise_support = 0;
        } else {
            opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
                           "opal_lt_dladvise_init", retval, opal_lt_dlerror());
            return 1;
        }
    }
    /* Now walk through all the potential names libcuda and find one
     * that works.  If it does, all is good.  If not, print out all
     * the messages about why things failed.  This code was careful
     * to try and save away all error messages if the loading ultimately
-     * failed to help with debugging.  
+     * failed to help with debugging.
     *
     * NOTE: On the first loop we just utilize the default loading
     * paths from the system.  For the second loop, set /usr/lib64 to
     * the search path and try again.  This is done to handle the case
-     * where we have both 32 and 64 bit libcuda.so libraries installed.
+     * where we have both 32 and 64 bit libcuda.so libraries
-     * Even when running in 64-bit mode, the /usr/lib directory
+     * installed.  Even when running in 64-bit mode, the /usr/lib
-     * is searched first and we may find a 32-bit libcuda.so.1 library.
+     * directory is searched first and we may find a 32-bit
-     * Loading of this library will fail as libtool does not handle having
+     * libcuda.so.1 library.  Loading of this library will fail as the
-     * the wrong ABI in the search path (unlike ld or ld.so).  Note that
+     * OPAL DL framework does not handle having the wrong ABI in the
-     * we only set this search path after the original search.  This is
+     * search path (unlike ld or ld.so).  Note that we only set this
-     * so that LD_LIBRARY_PATH and run path settings are respected.
+     * search path after the original search.  This is so that
-     * Setting this search path overrides them (rather then being appended). */
+     * LD_LIBRARY_PATH and run path settings are respected.  Setting
-    if (advise_support) {
+     * this search path overrides them (rather then being
-        if (0 != (retval = opal_lt_dladvise_global(&advise))) {
+     * appended). */
-            opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
+    j = 0;
-                           "opal_lt_dladvise_global", retval, opal_lt_dlerror());
+    while (searchpaths[j] != NULL) {
-            opal_lt_dladvise_destroy(&advise);
+        while (cudalibs[i] != NULL) {
-            return 1;
+            char *filename;
-        }
+            char *str;
        j = 0;
        while (searchpaths[j] != NULL) {
            /* Set explicit search path if entry is not empty string */
            if (strcmp("", searchpaths[j])) {
                opal_lt_dlsetsearchpath(searchpaths[j]);
            }
            i = 0;
            while (cudalibs[i] != NULL) {
                const char *str;
                libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
                if (NULL == libcuda_handle) {
                    str = opal_lt_dlerror();
                    if (NULL != str) {
                        opal_argv_append(&errsize, &errmsgs, str);
                    } else {
                        opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
                    }
                    opal_output_verbose(10, mca_common_cuda_output,
                                        "CUDA: Library open error: %s",
                                        errmsgs[errsize-1]);
                } else {
                    opal_output_verbose(10, mca_common_cuda_output,
                                        "CUDA: Library successfully opened %s",
                                        cudalibs[i]);
                    stage_one_init_passed = true;
                    break;
                }
                i++;
            }
            if (true == stage_one_init_passed) break; /* Break out of outer loop */
            j++;
        }
        opal_lt_dladvise_destroy(&advise);
    } else {
        j = 0;
        /* No lt_dladvise support.  This should rarely happen. */
        while (searchpaths[j] != NULL) {
            /* Set explicit search path if entry is not empty string */
            if (strcmp("", searchpaths[j])) {
                opal_lt_dlsetsearchpath(searchpaths[j]);
            }
            i = 0;
            while (cudalibs[i] != NULL) {
                const char *str;
                libcuda_handle = opal_lt_dlopen(cudalibs[i]);
                if (NULL == libcuda_handle) {
                    str = opal_lt_dlerror();
                    if (NULL != str) {
                        opal_argv_append(&errsize, &errmsgs, str);
                    } else {
                        opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
                    }
-                    opal_output_verbose(10, mca_common_cuda_output,
+            /* If there's a non-empty search path, prepend it
-                                        "CUDA: Library open error: %s",
+               to the library filename */
-                                        errmsgs[errsize-1]);
+            if (strlen(searchpaths[j]) > 0) {
-
+                asprintf(&filename, "%s/%s", searchpaths[j], cudalibs[i]);
-                } else {
+            } else {
-                    opal_output_verbose(10, mca_common_cuda_output,
+                filename = strdup(cudalibs[i]);
                                        "CUDA: Library successfully opened %s",
                                        cudalibs[i]);
                    stage_one_init_passed = true;
                    break;
                }
                i++;
            }
-            if (true == stage_one_init_passed) break; /* Break out of outer loop */
+            if (NULL == filename) {
-            j++;
+                opal_show_help("help-mpi-common-cuda.txt", "No memory",
                               true, OPAL_PROC_MY_HOSTNAME);
                return 1;
            }
            retval = opal_dl_open(filename, false, false,
                                  &libcuda_handle, &str);
            if (OPAL_SUCCESS != retval || NULL == libcuda_handle) {
                if (NULL != str) {
                    opal_argv_append(&errsize, &errmsgs, str);
                } else {
                    opal_argv_append(&errsize, &errmsgs,
                                     "opal_dl_open() returned NULL.");
                }
                opal_output_verbose(10, mca_common_cuda_output,
                                    "CUDA: Library open error: %s",
                                    errmsgs[errsize-1]);
            } else {
                opal_output_verbose(10, mca_common_cuda_output,
                                    "CUDA: Library successfully opened %s",
                                    cudalibs[i]);
                stage_one_init_passed = true;
                break;
            }
            i++;
            free(filename);
        }
        if (true == stage_one_init_passed) {
            break; /* Break out of outer loop */
        }
        j++;
    }
    if (true != stage_one_init_passed) {
@ -916,8 +874,7 @@ void mca_common_cuda_fini(void)
        OBJ_DESTRUCT(&common_cuda_dtoh_lock);
        OBJ_DESTRUCT(&common_cuda_ipc_lock);
        if (NULL != libcuda_handle) {
-            opal_lt_dlclose(libcuda_handle);
+            opal_dl_close(libcuda_handle);
            opal_lt_dlexit();
        }
        opal_output_verbose(20, mca_common_cuda_output,
--- a/opal/mca/common/cuda/help-mpi-common-cuda.txt
+++ b/opal/mca/common/cuda/help-mpi-common-cuda.txt
@ -1,10 +1,11 @@
 # -*- text -*-
 #
 # Copyright (c) 2011-2015 NVIDIA.  All rights reserved.
 # Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
 # $COPYRIGHT$
-# 
+#
 # Additional copyrights may follow
-# 
+#
 # $HEADER$
 #
 [cuCtxGetCurrent failed not initialized]
@ -152,14 +153,6 @@ Open MPI was compiled without dynamic library support (e.g., with the
 If you need CUDA support, reconfigure Open MPI with dynamic library support enabled.
 #
 [unknown ltdl error]
 While attempting to load the supporting libcuda.so library, an error
 occurred.  This really should rarely happen.  Please notify the Open
 MPI developers. 
   Function:     %s
   Return Value: %d
   Error string: %s
 #
 [dlopen failed]
 The library attempted to open the following supporting CUDA libraries, 
 but each of them failed.  CUDA-aware support is disabled.