diff --git a/ompi/mca/common/cuda/common_cuda.c b/ompi/mca/common/cuda/common_cuda.c index 2182ea8aa2..01e67eb63a 100644 --- a/ompi/mca/common/cuda/common_cuda.c +++ b/ompi/mca/common/cuda/common_cuda.c @@ -502,9 +502,11 @@ static int mca_common_cuda_load_libcuda(void) int retval, i, j; int advise_support = 1; bool loaded = false; - char *errs[NUMLIBS*SEARCHPATHS]; char *cudalibs[NUMLIBS] = {"libcuda.so.1", "libcuda.so"}; char *searchpaths[SEARCHPATHS] = {NULL, "/usr/lib64"}; + char **errmsgs = NULL; + char *errmsg = NULL; + int errsize; if (0 != (retval = opal_lt_dlinit())) { if (OPAL_ERR_NOT_SUPPORTED == retval) { @@ -528,10 +530,6 @@ static int mca_common_cuda_load_libcuda(void) } } - for (i = 0; i < NUMLIBS*SEARCHPATHS; i++) { - errs[i] = NULL; /* just to be safe */ - } - /* Now walk through all the potential names libcuda and find one * that works. If it does, all is good. If not, print out all * the messages about why things failed. This code was careful @@ -564,13 +562,13 @@ static int mca_common_cuda_load_libcuda(void) if (NULL == libcuda_handle) { str = opal_lt_dlerror(); if (NULL != str) { - errs[j*NUMLIBS + i] = strdup(str); + opal_argv_append(&errsize, &errmsgs, str); } else { - errs[j*NUMLIBS + i] = strdup("lt_dlerror() returned NULL."); + opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL."); } opal_output_verbose(10, mca_common_cuda_output, "CUDA: Library open error: %s", - errs[j*NUMLIBS + i]); + errmsgs[errsize-1]); } else { opal_output_verbose(10, mca_common_cuda_output, "CUDA: Library successfully opened %s", @@ -594,13 +592,15 @@ static int mca_common_cuda_load_libcuda(void) if (NULL == libcuda_handle) { str = opal_lt_dlerror(); if (NULL != str) { - errs[j*NUMLIBS + i] = strdup(str); + opal_argv_append(&errsize, &errmsgs, str); } else { - errs[j*NUMLIBS + i] = strdup("lt_dlerror() returned NULL."); + opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL."); } + opal_output_verbose(10, mca_common_cuda_output, "CUDA: Library open error: %s", - errs[j*NUMLIBS + i]); + errmsgs[errsize-1]); + } else { opal_output_verbose(10, mca_common_cuda_output, "CUDA: Library successfully opened %s", @@ -614,18 +614,13 @@ static int mca_common_cuda_load_libcuda(void) } if (loaded != true) { + errmsg = opal_argv_join(errmsgs, '\n'); opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true, - cudalibs[0], errs[0], cudalibs[1], errs[1], - cudalibs[2], errs[2], cudalibs[3], errs[3]); + errmsg); } - - /* Cleanup error messages. Need to do this after printing them. */ - for (i = 0; i < NUMLIBS*SEARCHPATHS; i++) { - if (NULL != errs[i]) { - free(errs[i]); - } - } - + opal_argv_free(errmsgs); + free(errmsg); + if (loaded != true) { return 1; } diff --git a/ompi/mca/common/cuda/help-mpi-common-cuda.txt b/ompi/mca/common/cuda/help-mpi-common-cuda.txt index c53a0dfba7..0ec70a089a 100644 --- a/ompi/mca/common/cuda/help-mpi-common-cuda.txt +++ b/ompi/mca/common/cuda/help-mpi-common-cuda.txt @@ -141,21 +141,11 @@ cause the program to abort. cuStreamCreate return value: %d Check the cuda.h file for what the return vale means. # -[dlopen disabled] -While trying to load the supporting libcuda.so library, an error was -detected. This error indicates that the Open MPI library was probably -configured with the --disable-dlopen flag. When the library is -configured in this way, CUDA support is disabled because CUDA support -depends on the ability to dynamically open libraries. Reconfigure -without the --disable-dlopen flag to get around this problem. -# -[dladvise disabled] -While trying to initialize the lt_dladvise structure, an error was -detected. This error indicates that the Open MPI library was -configured such that there is no support for the lt_dladvise -structure. This is needed for properly opening the libcuda library. -Look around for the OPAL_HAVE_LTDL_ADVISE macro and ensure that it -is defined as a 1. +[dlopen disabled] +Open MPI was compiled without dynamic library support (e.g., with the + --disable-dlopen flag), and therefore cannot utilize CUDA support. + +If you need CUDA support, reconfigure Open MPI with dynamic library support enabled. # [unknown ltdl error] While attempting to load the supporting libcuda.so library, an error @@ -166,15 +156,9 @@ MPI developers. Error string: %s # [dlopen failed] -The library attempted to open the supporting CUDA libraries but failed. - Library attempted: %s - Error string: %s - Library attempted: %s - Error string: %s - Library attempted: %s - Error string: %s - Library attempted: %s - Error string: %s +The library attempted to open the following supporting CUDA libraries, +but each of them failed. +%s # [dlsym failed] An error occurred while trying to map in the address of a function. diff --git a/ompi/mca/pml/ob1/Makefile.am b/ompi/mca/pml/ob1/Makefile.am index b0c103a425..6042e2df61 100644 --- a/ompi/mca/pml/ob1/Makefile.am +++ b/ompi/mca/pml/ob1/Makefile.am @@ -66,10 +66,6 @@ mcacomponentdir = $(pkglibdir) mcacomponent_LTLIBRARIES = $(component_install) mca_pml_ob1_la_SOURCES = $(ob1_sources) mca_pml_ob1_la_LDFLAGS = -module -avoid-version -#if MCA_ompi_cuda_support -#mca_pml_ob1_la_LIBADD = \ -# $(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la -#endif noinst_LTLIBRARIES = $(component_noinst) libmca_pml_ob1_la_SOURCES = $(ob1_sources)