1
1

Refactor some of the initialization code.

This commit was SVN r29009.
Этот коммит содержится в:
Rolf vandeVaart 2013-08-09 14:54:17 +00:00
родитель f7391eca23
Коммит cd72024a3c
2 изменённых файлов: 256 добавлений и 215 удалений

Просмотреть файл

@ -88,12 +88,16 @@ struct cudaFunctionTable {
int (*cuIpcCloseMemHandle)(CUdeviceptr); int (*cuIpcCloseMemHandle)(CUdeviceptr);
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr); int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
#endif /* OMPI_CUDA_SUPPORT_41 */ #endif /* OMPI_CUDA_SUPPORT_41 */
int (*cuCtxGetDevice)(CUdevice *);
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
int (*cuDeviceGet)(CUdevice *, int);
} cudaFunctionTable; } cudaFunctionTable;
typedef struct cudaFunctionTable cudaFunctionTable_t; typedef struct cudaFunctionTable cudaFunctionTable_t;
cudaFunctionTable_t cuFunc; cudaFunctionTable_t cuFunc;
static bool stage_one_init_complete = false;
static bool stage_three_init_complete = false;
static bool common_cuda_initialized = false; static bool common_cuda_initialized = false;
static bool common_cuda_init_function_added = false;
static int mca_common_cuda_verbose; static int mca_common_cuda_verbose;
static int mca_common_cuda_output = 0; static int mca_common_cuda_output = 0;
static bool mca_common_cuda_enabled = false; static bool mca_common_cuda_enabled = false;
@ -120,10 +124,10 @@ struct common_cuda_mem_regs_t {
}; };
typedef struct common_cuda_mem_regs_t common_cuda_mem_regs_t; typedef struct common_cuda_mem_regs_t common_cuda_mem_regs_t;
OBJ_CLASS_DECLARATION(common_cuda_mem_regs_t); OBJ_CLASS_DECLARATION(common_cuda_mem_regs_t);
OBJ_CLASS_INSTANCE( common_cuda_mem_regs_t, OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t,
opal_list_item_t, opal_list_item_t,
NULL, NULL,
NULL ); NULL);
#if OMPI_CUDA_SUPPORT_41 #if OMPI_CUDA_SUPPORT_41
static int mca_common_cuda_async = 1; static int mca_common_cuda_async = 1;
@ -167,7 +171,6 @@ static double accum;
static float mydifftime(struct timespec ts_start, struct timespec ts_end); static float mydifftime(struct timespec ts_start, struct timespec ts_end);
#endif /* CUDA_COMMON_TIMING */ #endif /* CUDA_COMMON_TIMING */
static int mca_common_cuda_load_libcuda(void);
/* These functions are typically unused in the optimized builds. */ /* These functions are typically unused in the optimized builds. */
static void cuda_dump_evthandle(int, void *, char *) __opal_attribute_unused__ ; static void cuda_dump_evthandle(int, void *, char *) __opal_attribute_unused__ ;
static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ; static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
@ -181,15 +184,54 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
#endif /* OMPI_CUDA_SUPPORT_41 */ #endif /* OMPI_CUDA_SUPPORT_41 */
int mca_common_cuda_register_mca_variables(void)
{
static bool registered = false;
if (registered) { /**
return OMPI_SUCCESS; * This function is registered with the OPAL CUDA support. In that way,
* these function pointers will be loaded into the OPAL CUDA code when
* the first convertor is initialized. This does not trigger any CUDA
* specific initialization as this may just be a host buffer that is
* triggering this call.
*/
static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable)
{
if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) {
return OMPI_ERROR;
} }
registered = true; ftable->gpu_is_gpu_buffer = &mca_common_cuda_is_gpu_buffer;
ftable->gpu_cu_memcpy_async = &mca_common_cuda_cu_memcpy_async;
ftable->gpu_cu_memcpy = &mca_common_cuda_cu_memcpy;
ftable->gpu_memmove = &mca_common_cuda_memmove;
opal_output_verbose(30, mca_common_cuda_output,
"CUDA: support functions initialized");
return OMPI_SUCCESS;
}
/**
* This is the first stage of initialization. This function is
* triggered when there are memory registration requests from various
* BTLs. This function will register some mca variables and then open
* and load the symbols needed from the CUDA driver library. Look for
* the SONAME of the library which is libcuda.so.1. In most cases,
* this will result in the library found. However, there are some
* setups that require the extra steps for searching. Any failure
* will result in this initialization failing and status will be set
* showing that.
*/
static int mca_common_cuda_stage_one_init(void)
{
opal_lt_dladvise advise;
int retval, i, j;
int advise_support = 1;
char *cudalibs[] = {"libcuda.so.1", NULL};
char *searchpaths[] = {"", "/usr/lib64", NULL};
char **errmsgs = NULL;
char *errmsg = NULL;
int errsize;
bool stage_one_init_passed = false;
stage_one_init_complete = true;
/* Set different levels of verbosity in the cuda related code. */ /* Set different levels of verbosity in the cuda related code. */
mca_common_cuda_verbose = 0; mca_common_cuda_verbose = 0;
@ -241,48 +283,198 @@ int mca_common_cuda_register_mca_variables(void)
&cuda_event_max); &cuda_event_max);
#endif /* OMPI_CUDA_SUPPORT_41 */ #endif /* OMPI_CUDA_SUPPORT_41 */
return OMPI_SUCCESS; mca_common_cuda_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);
if (0 != (retval = opal_lt_dlinit())) {
if (OPAL_ERR_NOT_SUPPORTED == retval) {
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
} else {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dlinit", retval, opal_lt_dlerror());
}
return 1;
}
/* Initialize the lt_dladvise structure. If this does not work, we can
* proceed without the support. Things should still work. */
if (0 != (retval = opal_lt_dladvise_init(&advise))) {
if (OPAL_ERR_NOT_SUPPORTED == retval) {
advise_support = 0;
} else {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dladvise_init", retval, opal_lt_dlerror());
return 1;
}
}
/* Now walk through all the potential names libcuda and find one
* that works. If it does, all is good. If not, print out all
* the messages about why things failed. This code was careful
* to try and save away all error messages if the loading ultimately
* failed to help with debugging.
* NOTE: On the first loop we just utilize the default loading
* paths from the system. For the second loop, set /usr/lib64 to
* the search path and try again. This is done to handle the case
* where we have both 32 and 64 bit libcuda.so libraries installed.
* Even when running in 64-bit mode, the /usr/lib direcotry
* is searched first and we may find a 32-bit libcuda.so.1 library.
* Loading of this library will fail as libtool does not handle having
* the wrong ABI in the search path (unlike ld or ld.so). Note that
* we only set this search path after the original search. This is
* so that LD_LIBRARY_PATH and run path settings are respected.
* Setting this search path overrides them (rather then being appended). */
if (advise_support) {
if (0 != (retval = opal_lt_dladvise_global(&advise))) {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dladvise_global", retval, opal_lt_dlerror());
opal_lt_dladvise_destroy(&advise);
return 1;
}
j = 0;
while (searchpaths[j] != NULL) {
/* Set explicit search path if entry is not empty string */
if (strcmp("", searchpaths[j])) {
opal_lt_dlsetsearchpath(searchpaths[j]);
}
i = 0;
while (cudalibs[i] != NULL) {
const char *str;
libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
if (NULL == libcuda_handle) {
str = opal_lt_dlerror();
if (NULL != str) {
opal_argv_append(&errsize, &errmsgs, str);
} else {
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
}
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library open error: %s",
errmsgs[errsize-1]);
} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library successfully opened %s",
cudalibs[i]);
stage_one_init_passed = true;
break;
}
i++;
}
if (true == stage_one_init_passed) break; /* Break out of outer loop */
j++;
}
opal_lt_dladvise_destroy(&advise);
} else {
j = 0;
/* No lt_dladvise support. This should rarely happen. */
while (searchpaths[j] != NULL) {
/* Set explicit search path if entry is not empty string */
if (strcmp("", searchpaths[j])) {
opal_lt_dlsetsearchpath(searchpaths[j]);
}
i = 0;
while (cudalibs[i] != NULL) {
const char *str;
libcuda_handle = opal_lt_dlopen(cudalibs[i]);
if (NULL == libcuda_handle) {
str = opal_lt_dlerror();
if (NULL != str) {
opal_argv_append(&errsize, &errmsgs, str);
} else {
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
}
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library open error: %s",
errmsgs[errsize-1]);
} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library successfully opened %s",
cudalibs[i]);
stage_one_init_passed = true;
break;
}
i++;
}
if (true == stage_one_init_passed) break; /* Break out of outer loop */
j++;
}
}
if (true != stage_one_init_passed) {
errmsg = opal_argv_join(errmsgs, '\n');
opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
errmsg);
}
opal_argv_free(errmsgs);
free(errmsg);
if (true != stage_one_init_passed) {
return 1;
}
/* Map in the functions that we need. Note that if there is an error
* the macro OMPI_CUDA_DLSYM will print an error and call return. */
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamCreate);
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetCurrent);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventCreate);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventRecord);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostRegister);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostUnregister);
OMPI_CUDA_DLSYM(libcuda_handle, cuPointerGetAttribute);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventQuery);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventDestroy);
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamWaitEvent);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpyAsync);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpy);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemFree);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
#if OMPI_CUDA_SUPPORT_41
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
#endif /* OMPI_CUDA_SUPPORT_41 */
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
return 0;
} }
/** /**
* This function is registered with the OPAL CUDA support. In that way, * This is the last phase of initialization. This is triggered when we examine
* we will complete initialization when OPAL detects the first GPU memory * a buffer pointer and determine it is a GPU buffer. We then assume the user
* access. In the case that no GPU memory access happens, then this function * has selected their GPU and we can go ahead with all the CUDA related
* never gets called. * initializations.
*/ */
static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable) static int mca_common_cuda_stage_three_init(void)
{ {
int i, s; int i, s;
CUresult res; CUresult res;
CUcontext cuContext; CUcontext cuContext;
common_cuda_mem_regs_t *mem_reg; common_cuda_mem_regs_t *mem_reg;
stage_three_init_complete = true;
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: entering stage three init");
if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) { if (OPAL_UNLIKELY(!ompi_mpi_cuda_support)) {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: No mpi cuda support, exiting stage three init");
return OMPI_ERROR; return OMPI_ERROR;
} }
if (OPAL_LIKELY(common_cuda_initialized)) { if (OPAL_LIKELY(common_cuda_initialized)) {
opal_output_verbose(20, mca_common_cuda_output,
"CUDA: Stage three already complete, exiting stage three init");
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
/* Make sure this component's variables are registered */
mca_common_cuda_register_mca_variables();
ftable->gpu_is_gpu_buffer = &mca_common_cuda_is_gpu_buffer;
ftable->gpu_cu_memcpy_async = &mca_common_cuda_cu_memcpy_async;
ftable->gpu_cu_memcpy = &mca_common_cuda_cu_memcpy;
ftable->gpu_memmove = &mca_common_cuda_memmove;
mca_common_cuda_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);
/* If we cannot load the libary, then disable support */
if (0 != mca_common_cuda_load_libcuda()) {
common_cuda_initialized = true;
ompi_mpi_cuda_support = 0;
return OMPI_ERROR;
}
/* Check to see if this process is running in a CUDA context. If /* Check to see if this process is running in a CUDA context. If
* so, all is good. If not, then disable registration of memory. */ * so, all is good. If not, then disable registration of memory. */
res = cuFunc.cuCtxGetCurrent(&cuContext); res = cuFunc.cuCtxGetCurrent(&cuContext);
@ -489,178 +681,6 @@ static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable)
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
/**
* This function will open and load the symbols needed from the CUDA driver
* library. Any failure will result in a message and we will return 1.
* Look for the SONAME of the library which is libcuda.so.1. In most
* cases, this will result in the library found. However, there are some
* setups that require the extra steps for searching.
*/
static int mca_common_cuda_load_libcuda(void)
{
opal_lt_dladvise advise;
int retval, i, j;
int advise_support = 1;
bool loaded = false;
char *cudalibs[] = {"libcuda.so.1", NULL};
char *searchpaths[] = {"", "/usr/lib64", NULL};
char **errmsgs = NULL;
char *errmsg = NULL;
int errsize;
if (0 != (retval = opal_lt_dlinit())) {
if (OPAL_ERR_NOT_SUPPORTED == retval) {
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
} else {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dlinit", retval, opal_lt_dlerror());
}
return 1;
}
/* Initialize the lt_dladvise structure. If this does not work, we can
* proceed without the support. Things should still work. */
if (0 != (retval = opal_lt_dladvise_init(&advise))) {
if (OPAL_ERR_NOT_SUPPORTED == retval) {
advise_support = 0;
} else {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dladvise_init", retval, opal_lt_dlerror());
return 1;
}
}
/* Now walk through all the potential names libcuda and find one
* that works. If it does, all is good. If not, print out all
* the messages about why things failed. This code was careful
* to try and save away all error messages if the loading ultimately
* failed to help with debugging.
* NOTE: On the first loop we just utilize the default loading
* paths from the system. For the second loop, set /usr/lib64 to
* the search path and try again. This is done to handle the case
* where we have both 32 and 64 bit libcuda.so libraries installed.
* Even when running in 64-bit mode, the /usr/lib direcotry
* is searched first and we may find a 32-bit libcuda.so.1 library.
* Loading of this library will fail as libtool does not handle having
* the wrong ABI in the search path (unlike ld or ld.so). Note that
* we only set this search path after the original search. This is
* so that LD_LIBRARY_PATH and run path settings are respected.
* Setting this search path overrides them (rather then being appended). */
if (advise_support) {
if (0 != (retval = opal_lt_dladvise_global(&advise))) {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dladvise_global", retval, opal_lt_dlerror());
opal_lt_dladvise_destroy(&advise);
return 1;
}
j = 0;
while (searchpaths[j] != NULL) {
/* Set explicit search path if entry is not empty string */
if (strcmp("", searchpaths[j])) {
opal_lt_dlsetsearchpath(searchpaths[j]);
}
i = 0;
while (cudalibs[i] != NULL) {
const char *str;
libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
if (NULL == libcuda_handle) {
str = opal_lt_dlerror();
if (NULL != str) {
opal_argv_append(&errsize, &errmsgs, str);
} else {
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
}
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library open error: %s",
errmsgs[errsize-1]);
} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library successfully opened %s",
cudalibs[i]);
loaded = true;
break;
}
i++;
}
if (true == loaded) break; /* Break out of outer loop */
j++;
}
opal_lt_dladvise_destroy(&advise);
} else {
j = 0;
/* No lt_dladvise support. This should rarely happen. */
while (searchpaths[j] != NULL) {
/* Set explicit search path if entry is not empty string */
if (strcmp("", searchpaths[j])) {
opal_lt_dlsetsearchpath(searchpaths[j]);
}
i = 0;
while (cudalibs[i] != NULL) {
const char *str;
libcuda_handle = opal_lt_dlopen(cudalibs[i]);
if (NULL == libcuda_handle) {
str = opal_lt_dlerror();
if (NULL != str) {
opal_argv_append(&errsize, &errmsgs, str);
} else {
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
}
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library open error: %s",
errmsgs[errsize-1]);
} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library successfully opened %s",
cudalibs[i]);
loaded = true;
break;
}
i++;
}
if (true == loaded) break; /* Break out of outer loop */
j++;
}
}
if (loaded != true) {
errmsg = opal_argv_join(errmsgs, '\n');
opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
errmsg);
}
opal_argv_free(errmsgs);
free(errmsg);
if (loaded != true) {
return 1;
}
/* Map in the functions that we need */
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamCreate);
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetCurrent);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventCreate);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventRecord);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostRegister);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemHostUnregister);
OMPI_CUDA_DLSYM(libcuda_handle, cuPointerGetAttribute);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventQuery);
OMPI_CUDA_DLSYM(libcuda_handle, cuEventDestroy);
OMPI_CUDA_DLSYM(libcuda_handle, cuStreamWaitEvent);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpyAsync);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemcpy);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemFree);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
OMPI_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
#if OMPI_CUDA_SUPPORT_41
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
OMPI_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
#endif /* OMPI_CUDA_SUPPORT_41 */
return 0;
}
/** /**
* Call the CUDA register function so we pin the memory in the CUDA * Call the CUDA register function so we pin the memory in the CUDA
@ -669,13 +689,25 @@ static int mca_common_cuda_load_libcuda(void)
void mca_common_cuda_register(void *ptr, size_t amount, char *msg) { void mca_common_cuda_register(void *ptr, size_t amount, char *msg) {
int res; int res;
/* Always first check if the support is enabled. If not, just return */
if (!ompi_mpi_cuda_support)
return;
/* Registering memory during BTL initialization will be the first call
* into the cuda common code, so this is where we do the first
* initialization function. If the first stage fails, then disable
* support and return. */
if (!stage_one_init_complete) {
if (0 != mca_common_cuda_stage_one_init()) {
ompi_mpi_cuda_support = 0;
return;
}
opal_cuda_add_initialization_function(&mca_common_cuda_init);
OBJ_CONSTRUCT(&common_cuda_memory_registrations, opal_list_t);
}
if (!common_cuda_initialized) { if (!common_cuda_initialized) {
common_cuda_mem_regs_t *regptr; common_cuda_mem_regs_t *regptr;
if (!common_cuda_init_function_added) {
opal_cuda_add_initialization_function(&mca_common_cuda_init);
OBJ_CONSTRUCT(&common_cuda_memory_registrations, opal_list_t);
common_cuda_init_function_added = true;
}
regptr = OBJ_NEW(common_cuda_mem_regs_t); regptr = OBJ_NEW(common_cuda_mem_regs_t);
regptr->ptr = ptr; regptr->ptr = ptr;
regptr->amount = amount; regptr->amount = amount;
@ -713,7 +745,7 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
/* This can happen if memory was queued up to be registered, but /* This can happen if memory was queued up to be registered, but
* no CUDA operations happened, so it never was registered. * no CUDA operations happened, so it never was registered.
* Therefore, just release any of the resources. */ * Therefore, just release any of the resources. */
if (false == common_cuda_initialized) { if (!common_cuda_initialized) {
s = opal_list_get_size(&common_cuda_memory_registrations); s = opal_list_get_size(&common_cuda_memory_registrations);
for(i = 0; i < s; i++) { for(i = 0; i < s; i++) {
mem_reg = (common_cuda_mem_regs_t *) mem_reg = (common_cuda_mem_regs_t *)
@ -1426,6 +1458,14 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
} }
/* Must be a device pointer */ /* Must be a device pointer */
assert(memType == CU_MEMORYTYPE_DEVICE); assert(memType == CU_MEMORYTYPE_DEVICE);
/* First access on a device pointer finalizes CUDA support initialization.
* If initialization fails, disable support. */
if (!stage_three_init_complete) {
if (0 != mca_common_cuda_stage_three_init()) {
ompi_mpi_cuda_support = 0;
}
}
return 1; return 1;
} }

Просмотреть файл

@ -157,10 +157,11 @@ MPI developers.
# #
[dlopen failed] [dlopen failed]
The library attempted to open the following supporting CUDA libraries, The library attempted to open the following supporting CUDA libraries,
but each of them failed. but each of them failed. CUDA-aware support is disabled.
%s %s
# #
[dlsym failed] [dlsym failed]
An error occurred while trying to map in the address of a function. An error occurred while trying to map in the address of a function.
Function Name: %s Function Name: %s
Error string: %s Error string: %s
CUDA-aware support is disabled.