Refactor some of the initialization code.
This commit was SVN r29009.
Этот коммит содержится в:
родитель
f7391eca23
Коммит
cd72024a3c
@ -88,12 +88,16 @@ struct cudaFunctionTable {
|
|||||||
int (*cuIpcCloseMemHandle)(CUdeviceptr);
|
int (*cuIpcCloseMemHandle)(CUdeviceptr);
|
||||||
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
|
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
|
||||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||||
|
int (*cuCtxGetDevice)(CUdevice *);
|
||||||
|
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
|
||||||
|
int (*cuDeviceGet)(CUdevice *, int);
|
||||||
} cudaFunctionTable;
|
} cudaFunctionTable;
|
||||||
typedef struct cudaFunctionTable cudaFunctionTable_t;
|
typedef struct cudaFunctionTable cudaFunctionTable_t;
|
||||||
cudaFunctionTable_t cuFunc;
|
cudaFunctionTable_t cuFunc;
|
||||||
|
|
||||||
|
static bool stage_one_init_complete = false;
|
||||||
|
static bool stage_three_init_complete = false;
|
||||||
static bool common_cuda_initialized = false;
|
static bool common_cuda_initialized = false;
|
||||||
static bool common_cuda_init_function_added = false;
|
|
||||||
static int mca_common_cuda_verbose;
|
static int mca_common_cuda_verbose;
|
||||||
static int mca_common_cuda_output = 0;
|
static int mca_common_cuda_output = 0;
|
||||||
static bool mca_common_cuda_enabled = false;
|
static bool mca_common_cuda_enabled = false;
|
||||||
@ -120,10 +124,10 @@ struct common_cuda_mem_regs_t {
|
|||||||
};
|
};
|
||||||
typedef struct common_cuda_mem_regs_t common_cuda_mem_regs_t;
|
typedef struct common_cuda_mem_regs_t common_cuda_mem_regs_t;
|
||||||
OBJ_CLASS_DECLARATION(common_cuda_mem_regs_t);
|
OBJ_CLASS_DECLARATION(common_cuda_mem_regs_t);
|
||||||
OBJ_CLASS_INSTANCE( common_cuda_mem_regs_t,
|
OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t,
|
||||||
opal_list_item_t,
|
opal_list_item_t,
|
||||||
NULL,
|
NULL,
|
||||||
NULL );
|
NULL);
|
||||||
|
|
||||||
#if OMPI_CUDA_SUPPORT_41
|
#if OMPI_CUDA_SUPPORT_41
|
||||||
static int mca_common_cuda_async = 1;
|
static int mca_common_cuda_async = 1;
|
||||||
@ -167,7 +171,6 @@ static double accum;
|
|||||||
static float mydifftime(struct timespec ts_start, struct timespec ts_end);
|
static float mydifftime(struct timespec ts_start, struct timespec ts_end);
|
||||||
#endif /* CUDA_COMMON_TIMING */
|
#endif /* CUDA_COMMON_TIMING */
|
||||||
|
|
||||||
static int mca_common_cuda_load_libcuda(void);
|
|
||||||
/* These functions are typically unused in the optimized builds. */
|
/* These functions are typically unused in the optimized builds. */
|
||||||
static void cuda_dump_evthandle(int, void *, char *) __opal_attribute_unused__ ;
|
static void cuda_dump_evthandle(int, void *, char *) __opal_attribute_unused__ ;
|
||||||
static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
|
static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
|
||||||
@ -181,15 +184,54 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
|
|||||||
|
|
||||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||||
|
|
||||||
int mca_common_cuda_register_mca_variables(void)
|
|
||||||
{
|
|
||||||
static bool registered = false;
|
|
||||||
|
|
||||||
if (registered) {
|
/**
|
||||||
return OMPI_SUCCESS;
|
* This function is registered with the OPAL CUDA support. In that way,
|
||||||
|
* these function pointers will be loaded into the OPAL CUDA code when
|
||||||
|
* the first convertor is initialized. This does not trigger any CUDA
|
||||||
|
* specific initialization as this may just be a host buffer that is
|
||||||
|
* triggering this call.
|
||||||
|
*/
|
||||||
|
static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable)
|
||||||
|
{
|
||||||
|
if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) {
|
||||||
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
registered = true;
|
ftable->gpu_is_gpu_buffer = &mca_common_cuda_is_gpu_buffer;
|
||||||
|
ftable->gpu_cu_memcpy_async = &mca_common_cuda_cu_memcpy_async;
|
||||||
|
ftable->gpu_cu_memcpy = &mca_common_cuda_cu_memcpy;
|
||||||
|
ftable->gpu_memmove = &mca_common_cuda_memmove;
|
||||||
|
|
||||||
|
opal_output_verbose(30, mca_common_cuda_output,
|
||||||
|
"CUDA: support functions initialized");
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the first stage of initialization. This function is
|
||||||
|
* triggered when there are memory registration requests from various
|
||||||
|
* BTLs. This function will register some mca variables and then open
|
||||||
|
* and load the symbols needed from the CUDA driver library. Look for
|
||||||
|
* the SONAME of the library which is libcuda.so.1. In most cases,
|
||||||
|
* this will result in the library found. However, there are some
|
||||||
|
* setups that require the extra steps for searching. Any failure
|
||||||
|
* will result in this initialization failing and status will be set
|
||||||
|
* showing that.
|
||||||
|
*/
|
||||||
|
static int mca_common_cuda_stage_one_init(void)
|
||||||
|
{
|
||||||
|
opal_lt_dladvise advise;
|
||||||
|
int retval, i, j;
|
||||||
|
int advise_support = 1;
|
||||||
|
char *cudalibs[] = {"libcuda.so.1", NULL};
|
||||||
|
char *searchpaths[] = {"", "/usr/lib64", NULL};
|
||||||
|
char **errmsgs = NULL;
|
||||||
|
char *errmsg = NULL;
|
||||||
|
int errsize;
|
||||||
|
bool stage_one_init_passed = false;
|
||||||
|
|
||||||
|
stage_one_init_complete = true;
|
||||||
|
|
||||||
/* Set different levels of verbosity in the cuda related code. */
|
/* Set different levels of verbosity in the cuda related code. */
|
||||||
mca_common_cuda_verbose = 0;
|
mca_common_cuda_verbose = 0;
|
||||||
@ -241,48 +283,198 @@ int mca_common_cuda_register_mca_variables(void)
|
|||||||
&cuda_event_max);
|
&cuda_event_max);
|
||||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
mca_common_cuda_output = opal_output_open(NULL);
|
||||||
|
opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);
|
||||||
|
|
||||||
|
if (0 != (retval = opal_lt_dlinit())) {
|
||||||
|
if (OPAL_ERR_NOT_SUPPORTED == retval) {
|
||||||
|
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
|
||||||
|
} else {
|
||||||
|
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
||||||
|
"opal_lt_dlinit", retval, opal_lt_dlerror());
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize the lt_dladvise structure. If this does not work, we can
|
||||||
|
* proceed without the support. Things should still work. */
|
||||||
|
if (0 != (retval = opal_lt_dladvise_init(&advise))) {
|
||||||
|
if (OPAL_ERR_NOT_SUPPORTED == retval) {
|
||||||
|
advise_support = 0;
|
||||||
|
} else {
|
||||||
|
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
||||||
|
"opal_lt_dladvise_init", retval, opal_lt_dlerror());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now walk through all the potential names libcuda and find one
|
||||||
|
* that works. If it does, all is good. If not, print out all
|
||||||
|
* the messages about why things failed. This code was careful
|
||||||
|
* to try and save away all error messages if the loading ultimately
|
||||||
|
* failed to help with debugging.
|
||||||
|
* NOTE: On the first loop we just utilize the default loading
|
||||||
|
* paths from the system. For the second loop, set /usr/lib64 to
|
||||||
|
* the search path and try again. This is done to handle the case
|
||||||
|
* where we have both 32 and 64 bit libcuda.so libraries installed.
|
||||||
|
* Even when running in 64-bit mode, the /usr/lib direcotry
|
||||||
|
* is searched first and we may find a 32-bit libcuda.so.1 library.
|
||||||
|
* Loading of this library will fail as libtool does not handle having
|
||||||
|
* the wrong ABI in the search path (unlike ld or ld.so). Note that
|
||||||
|
* we only set this search path after the original search. This is
|
||||||
|
* so that LD_LIBRARY_PATH and run path settings are respected.
|
||||||
|
* Setting this search path overrides them (rather then being appended). */
|
||||||
|
if (advise_support) {
|
||||||
|
if (0 != (retval = opal_lt_dladvise_global(&advise))) {
|
||||||
|
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
||||||
|
"opal_lt_dladvise_global", retval, opal_lt_dlerror());
|
||||||
|
opal_lt_dladvise_destroy(&advise);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
j = 0;
|
||||||
|
while (searchpaths[j] != NULL) {
|
||||||
|
/* Set explicit search path if entry is not empty string */
|
||||||
|
if (strcmp("", searchpaths[j])) {
|
||||||
|
opal_lt_dlsetsearchpath(searchpaths[j]);
|
||||||
|
}
|
||||||
|
i = 0;
|
||||||
|
while (cudalibs[i] != NULL) {
|
||||||
|
const char *str;
|
||||||
|
libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
|
||||||
|
if (NULL == libcuda_handle) {
|
||||||
|
str = opal_lt_dlerror();
|
||||||
|
if (NULL != str) {
|
||||||
|
opal_argv_append(&errsize, &errmsgs, str);
|
||||||
|
} else {
|
||||||
|
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
|
||||||
|
}
|
||||||
|
opal_output_verbose(10, mca_common_cuda_output,
|
||||||
|
"CUDA: Library open error: %s",
|
||||||
|
errmsgs[errsize-1]);
|
||||||
|
} else {
|
||||||
|
opal_output_verbose(10, mca_common_cuda_output,
|
||||||
|
"CUDA: Library successfully opened %s",
|
||||||
|
cudalibs[i]);
|
||||||
|
stage_one_init_passed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (true == stage_one_init_passed) break; /* Break out of outer loop */
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
opal_lt_dladvise_destroy(&advise);
|
||||||
|
} else {
|
||||||
|
j = 0;
|
||||||
|
/* No lt_dladvise support. This should rarely happen. */
|
||||||
|
while (searchpaths[j] != NULL) {
|
||||||
|
/* Set explicit search path if entry is not empty string */
|
||||||
|
if (strcmp("", searchpaths[j])) {
|
||||||
|
opal_lt_dlsetsearchpath(searchpaths[j]);
|
||||||
|
}
|
||||||
|
i = 0;
|
||||||
|
while (cudalibs[i] != NULL) {
|
||||||
|
const char *str;
|
||||||
|
libcuda_handle = opal_lt_dlopen(cudalibs[i]);
|
||||||
|
if (NULL == libcuda_handle) {
|
||||||
|
str = opal_lt_dlerror();
|
||||||
|
if (NULL != str) {
|
||||||
|
opal_argv_append(&errsize, &errmsgs, str);
|
||||||
|
} else {
|
||||||
|
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
|
||||||
|
}
|
||||||
|
|
||||||
|
opal_output_verbose(10, mca_common_cuda_output,
|
||||||
|
"CUDA: Library open error: %s",
|
||||||
|
errmsgs[errsize-1]);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
opal_output_verbose(10, mca_common_cuda_output,
|
||||||
|
"CUDA: Library successfully opened %s",
|
||||||
|
cudalibs[i]);
|
||||||
|
stage_one_init_passed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (true == stage_one_init_passed) break; /* Break out of outer loop */
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (true != stage_one_init_passed) {
|
||||||
|
errmsg = opal_argv_join(errmsgs, '\n');
|
||||||
|
opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
|
||||||
|
errmsg);
|
||||||
|
}
|
||||||
|
opal_argv_free(errmsgs);
|
||||||
|
free(errmsg);
|
||||||
|
|
||||||
|
if (true != stage_one_init_passed) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Map in the functions that we need. Note that if there is an error
|
||||||
|
* the macro OMPI_CUDA_DLSYM will print an error and call return. */
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamCreate);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetCurrent);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuEventCreate);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuEventRecord);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostRegister);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostUnregister);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuPointerGetAttribute);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuEventQuery);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuEventDestroy);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamWaitEvent);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpyAsync);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpy);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuMemFree);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
|
||||||
|
#if OMPI_CUDA_SUPPORT_41
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
|
||||||
|
#endif /* OMPI_CUDA_SUPPORT_41 */
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
|
||||||
|
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function is registered with the OPAL CUDA support. In that way,
|
* This is the last phase of initialization. This is triggered when we examine
|
||||||
* we will complete initialization when OPAL detects the first GPU memory
|
* a buffer pointer and determine it is a GPU buffer. We then assume the user
|
||||||
* access. In the case that no GPU memory access happens, then this function
|
* has selected their GPU and we can go ahead with all the CUDA related
|
||||||
* never gets called.
|
* initializations.
|
||||||
*/
|
*/
|
||||||
static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable)
|
static int mca_common_cuda_stage_three_init(void)
|
||||||
{
|
{
|
||||||
int i, s;
|
int i, s;
|
||||||
CUresult res;
|
CUresult res;
|
||||||
CUcontext cuContext;
|
CUcontext cuContext;
|
||||||
common_cuda_mem_regs_t *mem_reg;
|
common_cuda_mem_regs_t *mem_reg;
|
||||||
|
|
||||||
|
stage_three_init_complete = true;
|
||||||
|
|
||||||
|
opal_output_verbose(20, mca_common_cuda_output,
|
||||||
|
"CUDA: entering stage three init");
|
||||||
|
|
||||||
if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) {
|
if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) {
|
||||||
|
opal_output_verbose(20, mca_common_cuda_output,
|
||||||
|
"CUDA: No mpi cuda support, exiting stage three init");
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OPAL_LIKELY(common_cuda_initialized)) {
|
if (OPAL_LIKELY(common_cuda_initialized)) {
|
||||||
|
opal_output_verbose(20, mca_common_cuda_output,
|
||||||
|
"CUDA: Stage three already complete, exiting stage three init");
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make sure this component's variables are registered */
|
|
||||||
mca_common_cuda_register_mca_variables();
|
|
||||||
|
|
||||||
ftable->gpu_is_gpu_buffer = &mca_common_cuda_is_gpu_buffer;
|
|
||||||
ftable->gpu_cu_memcpy_async = &mca_common_cuda_cu_memcpy_async;
|
|
||||||
ftable->gpu_cu_memcpy = &mca_common_cuda_cu_memcpy;
|
|
||||||
ftable->gpu_memmove = &mca_common_cuda_memmove;
|
|
||||||
|
|
||||||
mca_common_cuda_output = opal_output_open(NULL);
|
|
||||||
opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);
|
|
||||||
|
|
||||||
/* If we cannot load the libary, then disable support */
|
|
||||||
if (0 != mca_common_cuda_load_libcuda()) {
|
|
||||||
common_cuda_initialized = true;
|
|
||||||
ompi_mpi_cuda_support = 0;
|
|
||||||
return OMPI_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check to see if this process is running in a CUDA context. If
|
/* Check to see if this process is running in a CUDA context. If
|
||||||
* so, all is good. If not, then disable registration of memory. */
|
* so, all is good. If not, then disable registration of memory. */
|
||||||
res = cuFunc.cuCtxGetCurrent(&cuContext);
|
res = cuFunc.cuCtxGetCurrent(&cuContext);
|
||||||
@ -489,178 +681,6 @@ static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable)
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* This function will open and load the symbols needed from the CUDA driver
|
|
||||||
* library. Any failure will result in a message and we will return 1.
|
|
||||||
* Look for the SONAME of the library which is libcuda.so.1. In most
|
|
||||||
* cases, this will result in the library found. However, there are some
|
|
||||||
* setups that require the extra steps for searching.
|
|
||||||
*/
|
|
||||||
static int mca_common_cuda_load_libcuda(void)
|
|
||||||
{
|
|
||||||
opal_lt_dladvise advise;
|
|
||||||
int retval, i, j;
|
|
||||||
int advise_support = 1;
|
|
||||||
bool loaded = false;
|
|
||||||
char *cudalibs[] = {"libcuda.so.1", NULL};
|
|
||||||
char *searchpaths[] = {"", "/usr/lib64", NULL};
|
|
||||||
char **errmsgs = NULL;
|
|
||||||
char *errmsg = NULL;
|
|
||||||
int errsize;
|
|
||||||
|
|
||||||
if (0 != (retval = opal_lt_dlinit())) {
|
|
||||||
if (OPAL_ERR_NOT_SUPPORTED == retval) {
|
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
|
|
||||||
} else {
|
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
|
||||||
"opal_lt_dlinit", retval, opal_lt_dlerror());
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize the lt_dladvise structure. If this does not work, we can
|
|
||||||
* proceed without the support. Things should still work. */
|
|
||||||
if (0 != (retval = opal_lt_dladvise_init(&advise))) {
|
|
||||||
if (OPAL_ERR_NOT_SUPPORTED == retval) {
|
|
||||||
advise_support = 0;
|
|
||||||
} else {
|
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
|
||||||
"opal_lt_dladvise_init", retval, opal_lt_dlerror());
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Now walk through all the potential names libcuda and find one
|
|
||||||
* that works. If it does, all is good. If not, print out all
|
|
||||||
* the messages about why things failed. This code was careful
|
|
||||||
* to try and save away all error messages if the loading ultimately
|
|
||||||
* failed to help with debugging.
|
|
||||||
* NOTE: On the first loop we just utilize the default loading
|
|
||||||
* paths from the system. For the second loop, set /usr/lib64 to
|
|
||||||
* the search path and try again. This is done to handle the case
|
|
||||||
* where we have both 32 and 64 bit libcuda.so libraries installed.
|
|
||||||
* Even when running in 64-bit mode, the /usr/lib direcotry
|
|
||||||
* is searched first and we may find a 32-bit libcuda.so.1 library.
|
|
||||||
* Loading of this library will fail as libtool does not handle having
|
|
||||||
* the wrong ABI in the search path (unlike ld or ld.so). Note that
|
|
||||||
* we only set this search path after the original search. This is
|
|
||||||
* so that LD_LIBRARY_PATH and run path settings are respected.
|
|
||||||
* Setting this search path overrides them (rather then being appended). */
|
|
||||||
if (advise_support) {
|
|
||||||
if (0 != (retval = opal_lt_dladvise_global(&advise))) {
|
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
|
|
||||||
"opal_lt_dladvise_global", retval, opal_lt_dlerror());
|
|
||||||
opal_lt_dladvise_destroy(&advise);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
j = 0;
|
|
||||||
while (searchpaths[j] != NULL) {
|
|
||||||
/* Set explicit search path if entry is not empty string */
|
|
||||||
if (strcmp("", searchpaths[j])) {
|
|
||||||
opal_lt_dlsetsearchpath(searchpaths[j]);
|
|
||||||
}
|
|
||||||
i = 0;
|
|
||||||
while (cudalibs[i] != NULL) {
|
|
||||||
const char *str;
|
|
||||||
libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
|
|
||||||
if (NULL == libcuda_handle) {
|
|
||||||
str = opal_lt_dlerror();
|
|
||||||
if (NULL != str) {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, str);
|
|
||||||
} else {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
|
|
||||||
}
|
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
|
||||||
"CUDA: Library open error: %s",
|
|
||||||
errmsgs[errsize-1]);
|
|
||||||
} else {
|
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
|
||||||
"CUDA: Library successfully opened %s",
|
|
||||||
cudalibs[i]);
|
|
||||||
loaded = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
if (true == loaded) break; /* Break out of outer loop */
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
opal_lt_dladvise_destroy(&advise);
|
|
||||||
} else {
|
|
||||||
j = 0;
|
|
||||||
/* No lt_dladvise support. This should rarely happen. */
|
|
||||||
while (searchpaths[j] != NULL) {
|
|
||||||
/* Set explicit search path if entry is not empty string */
|
|
||||||
if (strcmp("", searchpaths[j])) {
|
|
||||||
opal_lt_dlsetsearchpath(searchpaths[j]);
|
|
||||||
}
|
|
||||||
i = 0;
|
|
||||||
while (cudalibs[i] != NULL) {
|
|
||||||
const char *str;
|
|
||||||
libcuda_handle = opal_lt_dlopen(cudalibs[i]);
|
|
||||||
if (NULL == libcuda_handle) {
|
|
||||||
str = opal_lt_dlerror();
|
|
||||||
if (NULL != str) {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, str);
|
|
||||||
} else {
|
|
||||||
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
|
|
||||||
}
|
|
||||||
|
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
|
||||||
"CUDA: Library open error: %s",
|
|
||||||
errmsgs[errsize-1]);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
opal_output_verbose(10, mca_common_cuda_output,
|
|
||||||
"CUDA: Library successfully opened %s",
|
|
||||||
cudalibs[i]);
|
|
||||||
loaded = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
if (true == loaded) break; /* Break out of outer loop */
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (loaded != true) {
|
|
||||||
errmsg = opal_argv_join(errmsgs, '\n');
|
|
||||||
opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
|
|
||||||
errmsg);
|
|
||||||
}
|
|
||||||
opal_argv_free(errmsgs);
|
|
||||||
free(errmsg);
|
|
||||||
|
|
||||||
if (loaded != true) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Map in the functions that we need */
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamCreate);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetCurrent);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuEventCreate);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuEventRecord);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostRegister);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostUnregister);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuPointerGetAttribute);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuEventQuery);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuEventDestroy);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamWaitEvent);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpyAsync);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpy);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemFree);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
|
|
||||||
#if OMPI_CUDA_SUPPORT_41
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
|
|
||||||
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
|
|
||||||
#endif /* OMPI_CUDA_SUPPORT_41 */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Call the CUDA register function so we pin the memory in the CUDA
|
* Call the CUDA register function so we pin the memory in the CUDA
|
||||||
@ -669,13 +689,25 @@ static int mca_common_cuda_load_libcuda(void)
|
|||||||
void mca_common_cuda_register(void *ptr, size_t amount, char *msg) {
|
void mca_common_cuda_register(void *ptr, size_t amount, char *msg) {
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
|
/* Always first check if the support is enabled. If not, just return */
|
||||||
|
if (!ompi_mpi_cuda_support)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Registering memory during BTL initialization will be the first call
|
||||||
|
* into the cuda common code, so this is where we do the first
|
||||||
|
* initialization function. If the first stage fails, then disable
|
||||||
|
* support and return. */
|
||||||
|
if (!stage_one_init_complete) {
|
||||||
|
if (0 != mca_common_cuda_stage_one_init()) {
|
||||||
|
ompi_mpi_cuda_support = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
opal_cuda_add_initialization_function(&mca_common_cuda_init);
|
||||||
|
OBJ_CONSTRUCT(&common_cuda_memory_registrations, opal_list_t);
|
||||||
|
}
|
||||||
|
|
||||||
if (!common_cuda_initialized) {
|
if (!common_cuda_initialized) {
|
||||||
common_cuda_mem_regs_t *regptr;
|
common_cuda_mem_regs_t *regptr;
|
||||||
if (!common_cuda_init_function_added) {
|
|
||||||
opal_cuda_add_initialization_function(&mca_common_cuda_init);
|
|
||||||
OBJ_CONSTRUCT(&common_cuda_memory_registrations, opal_list_t);
|
|
||||||
common_cuda_init_function_added = true;
|
|
||||||
}
|
|
||||||
regptr = OBJ_NEW(common_cuda_mem_regs_t);
|
regptr = OBJ_NEW(common_cuda_mem_regs_t);
|
||||||
regptr->ptr = ptr;
|
regptr->ptr = ptr;
|
||||||
regptr->amount = amount;
|
regptr->amount = amount;
|
||||||
@ -713,7 +745,7 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
|
|||||||
/* This can happen if memory was queued up to be registered, but
|
/* This can happen if memory was queued up to be registered, but
|
||||||
* no CUDA operations happened, so it never was registered.
|
* no CUDA operations happened, so it never was registered.
|
||||||
* Therefore, just release any of the resources. */
|
* Therefore, just release any of the resources. */
|
||||||
if (false == common_cuda_initialized) {
|
if (!common_cuda_initialized) {
|
||||||
s = opal_list_get_size(&common_cuda_memory_registrations);
|
s = opal_list_get_size(&common_cuda_memory_registrations);
|
||||||
for(i = 0; i < s; i++) {
|
for(i = 0; i < s; i++) {
|
||||||
mem_reg = (common_cuda_mem_regs_t *)
|
mem_reg = (common_cuda_mem_regs_t *)
|
||||||
@ -1426,6 +1458,14 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
|
|||||||
}
|
}
|
||||||
/* Must be a device pointer */
|
/* Must be a device pointer */
|
||||||
assert(memType == CU_MEMORYTYPE_DEVICE);
|
assert(memType == CU_MEMORYTYPE_DEVICE);
|
||||||
|
|
||||||
|
/* First access on a device pointer finalizes CUDA support initialization.
|
||||||
|
* If initialization fails, disable support. */
|
||||||
|
if (!stage_three_init_complete) {
|
||||||
|
if (0 != mca_common_cuda_stage_three_init()) {
|
||||||
|
ompi_mpi_cuda_support = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,10 +157,11 @@ MPI developers.
|
|||||||
#
|
#
|
||||||
[dlopen failed]
|
[dlopen failed]
|
||||||
The library attempted to open the following supporting CUDA libraries,
|
The library attempted to open the following supporting CUDA libraries,
|
||||||
but each of them failed.
|
but each of them failed. CUDA-aware support is disabled.
|
||||||
%s
|
%s
|
||||||
#
|
#
|
||||||
[dlsym failed]
|
[dlsym failed]
|
||||||
An error occurred while trying to map in the address of a function.
|
An error occurred while trying to map in the address of a function.
|
||||||
Function Name: %s
|
Function Name: %s
|
||||||
Error string: %s
|
Error string: %s
|
||||||
|
CUDA-aware support is disabled.
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user