cuda: convert to opal_dl interface
Этот коммит содержится в:
родитель
c683500a29
Коммит
1995f6beba
@ -10,7 +10,7 @@ dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|||||||
dnl University of Stuttgart. All rights reserved.
|
dnl University of Stuttgart. All rights reserved.
|
||||||
dnl Copyright (c) 2004-2005 The Regents of the University of California.
|
dnl Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
dnl All rights reserved.
|
dnl All rights reserved.
|
||||||
dnl Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved.
|
dnl Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
|
||||||
dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||||
dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
|
dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||||
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||||
@ -72,10 +72,12 @@ AS_IF([test "$with_cuda" = "no" || test "x$with_cuda" = "x"],
|
|||||||
opal_cuda_incdir="$with_cuda/include"
|
opal_cuda_incdir="$with_cuda/include"
|
||||||
AC_MSG_RESULT([found ($opal_cuda_incdir/cuda.h)])])])])])
|
AC_MSG_RESULT([found ($opal_cuda_incdir/cuda.h)])])])])])
|
||||||
|
|
||||||
# We cannot have CUDA support without dlopen support. Check for that and
|
dnl We cannot have CUDA support without dlopen support. HOWEVER, at
|
||||||
# error out if the user has also set --disable-dlopen.
|
dnl this point in configure, we can't know whether the DL framework
|
||||||
AS_IF([test "$enable_dlopen" = "no" && test "$opal_check_cuda_happy" = "yes"],
|
dnl has been configured or not yet (it likely hasn't, since CUDA is a
|
||||||
[AC_MSG_ERROR([--with-cuda cannot be used with --disable-dlopen. Remove one of them and reconfigure.])])
|
dnl common framework, and likely configured first). So we have to
|
||||||
|
dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4
|
||||||
|
dnl macro, below). :-(
|
||||||
|
|
||||||
# If we have CUDA support, check to see if we have CUDA 4.1 support
|
# If we have CUDA support, check to see if we have CUDA 4.1 support
|
||||||
AS_IF([test "$opal_check_cuda_happy"="yes"],
|
AS_IF([test "$opal_check_cuda_happy"="yes"],
|
||||||
@ -142,3 +144,21 @@ AC_DEFINE_UNQUOTED([OPAL_CUDA_GDR_SUPPORT],$CUDA_VERSION_60_OR_GREATER,
|
|||||||
[Whether we have CUDA GDR support available])
|
[Whether we have CUDA GDR support available])
|
||||||
|
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl
|
||||||
|
dnl CUDA support requires DL support (it dynamically opens the CUDA
|
||||||
|
dnl library at run time). But we do not check for OPAL DL support
|
||||||
|
dnl until lafter the initial OPAL_CHECK_CUDA is called. So put the
|
||||||
|
dnl CUDA+DL check in a separate macro that can be called after the DL MCA
|
||||||
|
dnl framework checks in the top-level configure.ac.
|
||||||
|
dnl
|
||||||
|
AC_DEFUN([OPAL_CHECK_CUDA_AFTER_OPAL_DL],[
|
||||||
|
|
||||||
|
# We cannot have CUDA support without OPAL DL support. Error out
|
||||||
|
# if the user wants CUDA but we do not have OPAL DL support.
|
||||||
|
AS_IF([test $OPAL_HAVE_DL_SUPPORT -eq 0 && \
|
||||||
|
test "$opal_check_cuda_happy" = "yes"],
|
||||||
|
[AC_MSG_WARN([--with-cuda was specified, but dlopen support is disabled.])
|
||||||
|
AC_MSG_WARN([You must reconfigure Open MPI with dlopen ("dl") support.])
|
||||||
|
AC_MSG_ERROR([Cannot continue.])])
|
||||||
|
])
|
||||||
|
12
configure.ac
12
configure.ac
@ -1157,6 +1157,18 @@ m4_ifdef([project_ompi], [OMPI_REQUIRE_ENDPOINT_TAG_FINI])
|
|||||||
# checkpoint results
|
# checkpoint results
|
||||||
AC_CACHE_SAVE
|
AC_CACHE_SAVE
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# CUDA: part two
|
||||||
|
##################################
|
||||||
|
|
||||||
|
# This is somewhat gross to have a configure check for a common MCA
|
||||||
|
# component outside of the normal MCA checks, but this check must come
|
||||||
|
# after the opal DL MCA checks have done. Someday this could perhaps
|
||||||
|
# be done better by having some kind of "run this check at the end of
|
||||||
|
# all other MCA checks" hook...?
|
||||||
|
|
||||||
|
OPAL_CHECK_CUDA_AFTER_OPAL_DL
|
||||||
|
|
||||||
##################################
|
##################################
|
||||||
# MPI Extended Interfaces
|
# MPI Extended Interfaces
|
||||||
##################################
|
##################################
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -33,13 +34,13 @@
|
|||||||
#include "opal/datatype/opal_convertor.h"
|
#include "opal/datatype/opal_convertor.h"
|
||||||
#include "opal/datatype/opal_datatype_cuda.h"
|
#include "opal/datatype/opal_datatype_cuda.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/lt_interface.h"
|
|
||||||
#include "opal/util/show_help.h"
|
#include "opal/util/show_help.h"
|
||||||
#include "opal/util/proc.h"
|
#include "opal/util/proc.h"
|
||||||
|
|
||||||
#include "opal/mca/mpool/base/base.h"
|
#include "opal/mca/mpool/base/base.h"
|
||||||
#include "opal/runtime/opal_params.h"
|
#include "opal/runtime/opal_params.h"
|
||||||
#include "opal/mca/timer/base/base.h"
|
#include "opal/mca/timer/base/base.h"
|
||||||
|
#include "opal/mca/dl/base/base.h"
|
||||||
|
|
||||||
#include "common_cuda.h"
|
#include "common_cuda.h"
|
||||||
|
|
||||||
@ -55,12 +56,15 @@
|
|||||||
|
|
||||||
#define OPAL_CUDA_DLSYM(libhandle, funcName) \
|
#define OPAL_CUDA_DLSYM(libhandle, funcName) \
|
||||||
do { \
|
do { \
|
||||||
*(void **)(&cuFunc.funcName) = opal_lt_dlsym(libhandle, STRINGIFY(funcName)); \
|
char *err_msg; \
|
||||||
if (NULL == cuFunc.funcName) { \
|
void *ptr; \
|
||||||
|
if (OPAL_SUCCESS != \
|
||||||
|
opal_dl_lookup(libhandle, STRINGIFY(funcName), &ptr, &err_msg)) { \
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "dlsym failed", true, \
|
opal_show_help("help-mpi-common-cuda.txt", "dlsym failed", true, \
|
||||||
STRINGIFY(funcName), opal_lt_dlerror()); \
|
STRINGIFY(funcName), err_msg); \
|
||||||
return 1; \
|
return 1; \
|
||||||
} else { \
|
} else { \
|
||||||
|
*(void **)(&cuFunc.funcName) = ptr; \
|
||||||
opal_output_verbose(15, mca_common_cuda_output, \
|
opal_output_verbose(15, mca_common_cuda_output, \
|
||||||
"CUDA: successful dlsym of %s", \
|
"CUDA: successful dlsym of %s", \
|
||||||
STRINGIFY(funcName)); \
|
STRINGIFY(funcName)); \
|
||||||
@ -185,7 +189,7 @@ static int cuda_event_dtoh_most = 0;
|
|||||||
static int cuda_event_htod_most = 0;
|
static int cuda_event_htod_most = 0;
|
||||||
|
|
||||||
/* Handle to libcuda.so */
|
/* Handle to libcuda.so */
|
||||||
opal_lt_dlhandle libcuda_handle = NULL;
|
opal_dl_handle_t *libcuda_handle = NULL;
|
||||||
|
|
||||||
/* Unused variable that we register at init time and unregister at fini time.
|
/* Unused variable that we register at init time and unregister at fini time.
|
||||||
* This is used to detect if user has done a device reset prior to MPI_Finalize.
|
* This is used to detect if user has done a device reset prior to MPI_Finalize.
|
||||||
@ -233,9 +237,7 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
|
|||||||
*/
|
*/
|
||||||
int mca_common_cuda_stage_one_init(void)
|
int mca_common_cuda_stage_one_init(void)
|
||||||
{
|
{
|
||||||
opal_lt_dladvise advise;
|
|
||||||
int retval, i, j;
|
int retval, i, j;
|
||||||
int advise_support = 1;
|
|
||||||
char *cudalibs[] = {"libcuda.so.1", "libcuda.dylib", NULL};
|
char *cudalibs[] = {"libcuda.so.1", "libcuda.dylib", NULL};
|
||||||
char *searchpaths[] = {"", "/usr/lib64", NULL};
|
char *searchpaths[] = {"", "/usr/lib64", NULL};
|
||||||
char **errmsgs = NULL;
|
char **errmsgs = NULL;
|
||||||
@ -339,120 +341,76 @@ int mca_common_cuda_stage_one_init(void)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (0 != (retval = opal_lt_dlinit())) {
|
if (!OPAL_HAVE_DL_SUPPORT) {
|
||||||
if (OPAL_ERR_NOT_SUPPORTED == retval) {
|
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
|
|
||||||
} else {
|
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
|
||||||
"opal_lt_dlinit", retval, opal_lt_dlerror());
|
|
||||||
}
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initialize the lt_dladvise structure. If this does not work, we can
|
|
||||||
* proceed without the support. Things should still work. */
|
|
||||||
if (0 != (retval = opal_lt_dladvise_init(&advise))) {
|
|
||||||
if (OPAL_ERR_NOT_SUPPORTED == retval) {
|
|
||||||
advise_support = 0;
|
|
||||||
} else {
|
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
|
||||||
"opal_lt_dladvise_init", retval, opal_lt_dlerror());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Now walk through all the potential names libcuda and find one
|
/* Now walk through all the potential names libcuda and find one
|
||||||
* that works. If it does, all is good. If not, print out all
|
* that works. If it does, all is good. If not, print out all
|
||||||
* the messages about why things failed. This code was careful
|
* the messages about why things failed. This code was careful
|
||||||
* to try and save away all error messages if the loading ultimately
|
* to try and save away all error messages if the loading ultimately
|
||||||
* failed to help with debugging.
|
* failed to help with debugging.
|
||||||
|
*
|
||||||
* NOTE: On the first loop we just utilize the default loading
|
* NOTE: On the first loop we just utilize the default loading
|
||||||
* paths from the system. For the second loop, set /usr/lib64 to
|
* paths from the system. For the second loop, set /usr/lib64 to
|
||||||
* the search path and try again. This is done to handle the case
|
* the search path and try again. This is done to handle the case
|
||||||
* where we have both 32 and 64 bit libcuda.so libraries installed.
|
* where we have both 32 and 64 bit libcuda.so libraries
|
||||||
* Even when running in 64-bit mode, the /usr/lib directory
|
* installed. Even when running in 64-bit mode, the /usr/lib
|
||||||
* is searched first and we may find a 32-bit libcuda.so.1 library.
|
* directory is searched first and we may find a 32-bit
|
||||||
* Loading of this library will fail as libtool does not handle having
|
* libcuda.so.1 library. Loading of this library will fail as the
|
||||||
* the wrong ABI in the search path (unlike ld or ld.so). Note that
|
* OPAL DL framework does not handle having the wrong ABI in the
|
||||||
* we only set this search path after the original search. This is
|
* search path (unlike ld or ld.so). Note that we only set this
|
||||||
* so that LD_LIBRARY_PATH and run path settings are respected.
|
* search path after the original search. This is so that
|
||||||
* Setting this search path overrides them (rather then being appended). */
|
* LD_LIBRARY_PATH and run path settings are respected. Setting
|
||||||
if (advise_support) {
|
* this search path overrides them (rather then being
|
||||||
if (0 != (retval = opal_lt_dladvise_global(&advise))) {
|
* appended). */
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
j = 0;
|
||||||
"opal_lt_dladvise_global", retval, opal_lt_dlerror());
|
while (searchpaths[j] != NULL) {
|
||||||
opal_lt_dladvise_destroy(&advise);
|
while (cudalibs[i] != NULL) {
|
||||||
return 1;
|
char *filename;
|
||||||
}
|
char *str;
|
||||||
j = 0;
|
|
||||||
while (searchpaths[j] != NULL) {
|
|
||||||
/* Set explicit search path if entry is not empty string */
|
|
||||||
if (strcmp("", searchpaths[j])) {
|
|
||||||
opal_lt_dlsetsearchpath(searchpaths[j]);
|
|
||||||
}
|
|
||||||
i = 0;
|
|
||||||
while (cudalibs[i] != NULL) {
|
|
||||||
const char *str;
|
|
||||||
libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
|
|
||||||
if (NULL == libcuda_handle) {
|
|
||||||
str = opal_lt_dlerror();
|
|
||||||
if (NULL != str) {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, str);
|
|
||||||
} else {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
|
|
||||||
}
|
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
|
||||||
"CUDA: Library open error: %s",
|
|
||||||
errmsgs[errsize-1]);
|
|
||||||
} else {
|
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
|
||||||
"CUDA: Library successfully opened %s",
|
|
||||||
cudalibs[i]);
|
|
||||||
stage_one_init_passed = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
if (true == stage_one_init_passed) break; /* Break out of outer loop */
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
opal_lt_dladvise_destroy(&advise);
|
|
||||||
} else {
|
|
||||||
j = 0;
|
|
||||||
/* No lt_dladvise support. This should rarely happen. */
|
|
||||||
while (searchpaths[j] != NULL) {
|
|
||||||
/* Set explicit search path if entry is not empty string */
|
|
||||||
if (strcmp("", searchpaths[j])) {
|
|
||||||
opal_lt_dlsetsearchpath(searchpaths[j]);
|
|
||||||
}
|
|
||||||
i = 0;
|
|
||||||
while (cudalibs[i] != NULL) {
|
|
||||||
const char *str;
|
|
||||||
libcuda_handle = opal_lt_dlopen(cudalibs[i]);
|
|
||||||
if (NULL == libcuda_handle) {
|
|
||||||
str = opal_lt_dlerror();
|
|
||||||
if (NULL != str) {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, str);
|
|
||||||
} else {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
|
|
||||||
}
|
|
||||||
|
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
/* If there's a non-empty search path, prepend it
|
||||||
"CUDA: Library open error: %s",
|
to the library filename */
|
||||||
errmsgs[errsize-1]);
|
if (strlen(searchpaths[j]) > 0) {
|
||||||
|
asprintf(&filename, "%s/%s", searchpaths[j], cudalibs[i]);
|
||||||
} else {
|
} else {
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
filename = strdup(cudalibs[i]);
|
||||||
"CUDA: Library successfully opened %s",
|
|
||||||
cudalibs[i]);
|
|
||||||
stage_one_init_passed = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
}
|
||||||
if (true == stage_one_init_passed) break; /* Break out of outer loop */
|
if (NULL == filename) {
|
||||||
j++;
|
opal_show_help("help-mpi-common-cuda.txt", "No memory",
|
||||||
|
true, OPAL_PROC_MY_HOSTNAME);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
retval = opal_dl_open(filename, false, false,
|
||||||
|
&libcuda_handle, &str);
|
||||||
|
if (OPAL_SUCCESS != retval || NULL == libcuda_handle) {
|
||||||
|
if (NULL != str) {
|
||||||
|
opal_argv_append(&errsize, &errmsgs, str);
|
||||||
|
} else {
|
||||||
|
opal_argv_append(&errsize, &errmsgs,
|
||||||
|
"opal_dl_open() returned NULL.");
|
||||||
|
}
|
||||||
|
opal_output_verbose(10, mca_common_cuda_output,
|
||||||
|
"CUDA: Library open error: %s",
|
||||||
|
errmsgs[errsize-1]);
|
||||||
|
} else {
|
||||||
|
opal_output_verbose(10, mca_common_cuda_output,
|
||||||
|
"CUDA: Library successfully opened %s",
|
||||||
|
cudalibs[i]);
|
||||||
|
stage_one_init_passed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
|
||||||
|
free(filename);
|
||||||
}
|
}
|
||||||
|
if (true == stage_one_init_passed) {
|
||||||
|
break; /* Break out of outer loop */
|
||||||
|
}
|
||||||
|
j++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (true != stage_one_init_passed) {
|
if (true != stage_one_init_passed) {
|
||||||
@ -916,8 +874,7 @@ void mca_common_cuda_fini(void)
|
|||||||
OBJ_DESTRUCT(&common_cuda_dtoh_lock);
|
OBJ_DESTRUCT(&common_cuda_dtoh_lock);
|
||||||
OBJ_DESTRUCT(&common_cuda_ipc_lock);
|
OBJ_DESTRUCT(&common_cuda_ipc_lock);
|
||||||
if (NULL != libcuda_handle) {
|
if (NULL != libcuda_handle) {
|
||||||
opal_lt_dlclose(libcuda_handle);
|
opal_dl_close(libcuda_handle);
|
||||||
opal_lt_dlexit();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_output_verbose(20, mca_common_cuda_output,
|
opal_output_verbose(20, mca_common_cuda_output,
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
# -*- text -*-
|
# -*- text -*-
|
||||||
#
|
#
|
||||||
# Copyright (c) 2011-2015 NVIDIA. All rights reserved.
|
# Copyright (c) 2011-2015 NVIDIA. All rights reserved.
|
||||||
|
# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
#
|
#
|
||||||
# $HEADER$
|
# $HEADER$
|
||||||
#
|
#
|
||||||
[cuCtxGetCurrent failed not initialized]
|
[cuCtxGetCurrent failed not initialized]
|
||||||
@ -152,14 +153,6 @@ Open MPI was compiled without dynamic library support (e.g., with the
|
|||||||
|
|
||||||
If you need CUDA support, reconfigure Open MPI with dynamic library support enabled.
|
If you need CUDA support, reconfigure Open MPI with dynamic library support enabled.
|
||||||
#
|
#
|
||||||
[unknown ltdl error]
|
|
||||||
While attempting to load the supporting libcuda.so library, an error
|
|
||||||
occurred. This really should rarely happen. Please notify the Open
|
|
||||||
MPI developers.
|
|
||||||
Function: %s
|
|
||||||
Return Value: %d
|
|
||||||
Error string: %s
|
|
||||||
#
|
|
||||||
[dlopen failed]
|
[dlopen failed]
|
||||||
The library attempted to open the following supporting CUDA libraries,
|
The library attempted to open the following supporting CUDA libraries,
|
||||||
but each of them failed. CUDA-aware support is disabled.
|
but each of them failed. CUDA-aware support is disabled.
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user