Code to check for managed memory. Configure support also.
This commit was SVN r32801.
Этот коммит содержится в:
родитель
35858f837a
Коммит
399dc3db43
@ -93,6 +93,13 @@ AC_COMPILE_IFELSE(
|
|||||||
[CUDA_VERSION_60_OR_GREATER=1],
|
[CUDA_VERSION_60_OR_GREATER=1],
|
||||||
[CUDA_VERSION_60_OR_GREATER=0])
|
[CUDA_VERSION_60_OR_GREATER=0])
|
||||||
|
|
||||||
|
# If we have CUDA support, check to see if we have support for cuPointerGetAttributes
|
||||||
|
# which was first introduced in CUDA 7.0.
|
||||||
|
AS_IF([test "$opal_check_cuda_happy"="yes"],
|
||||||
|
AC_CHECK_DECL([cuPointerGetAttributes], [CUDA_GET_ATTRIBUTES=1], [CUDA_GET_ATTRIBUTES=0],
|
||||||
|
[#include <$opal_cuda_incdir/cuda.h>]),
|
||||||
|
[])
|
||||||
|
|
||||||
AC_MSG_CHECKING([if have cuda support])
|
AC_MSG_CHECKING([if have cuda support])
|
||||||
if test "$opal_check_cuda_happy" = "yes"; then
|
if test "$opal_check_cuda_happy" = "yes"; then
|
||||||
AC_MSG_RESULT([yes (-I$with_cuda)])
|
AC_MSG_RESULT([yes (-I$with_cuda)])
|
||||||
@ -116,6 +123,10 @@ AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
|
|||||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
|
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
|
||||||
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
|
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
|
||||||
|
|
||||||
|
AM_CONDITIONAL([OPAL_cuda_get_attributes], [test "x$CUDA_GET_ATTRIBUTES" = "x1"])
|
||||||
|
AC_DEFINE_UNQUOTED([OPAL_CUDA_GET_ATTRIBUTES],$CUDA_GET_ATTRIBUTES,
|
||||||
|
[Whether we have CUDA cuPointerGetAttributes function available])
|
||||||
|
|
||||||
# There is nothing specific we can check for to see if GPU Direct RDMA is available.
|
# There is nothing specific we can check for to see if GPU Direct RDMA is available.
|
||||||
# Therefore, we check to see whether we have CUDA 6.0 or later.
|
# Therefore, we check to see whether we have CUDA 6.0 or later.
|
||||||
AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "x1"])
|
AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "x1"])
|
||||||
|
@ -100,6 +100,9 @@ struct cudaFunctionTable {
|
|||||||
int (*cuEventSynchronize)(CUevent);
|
int (*cuEventSynchronize)(CUevent);
|
||||||
int (*cuStreamSynchronize)(CUstream);
|
int (*cuStreamSynchronize)(CUstream);
|
||||||
int (*cuStreamDestroy)(CUstream);
|
int (*cuStreamDestroy)(CUstream);
|
||||||
|
#if OPAL_CUDA_GET_ATTRIBUTES
|
||||||
|
int (*cuPointerGetAttributes)(unsigned int, CUpointer_attribute *, void **, CUdeviceptr);
|
||||||
|
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
|
||||||
} cudaFunctionTable;
|
} cudaFunctionTable;
|
||||||
typedef struct cudaFunctionTable cudaFunctionTable_t;
|
typedef struct cudaFunctionTable cudaFunctionTable_t;
|
||||||
cudaFunctionTable_t cuFunc;
|
cudaFunctionTable_t cuFunc;
|
||||||
@ -494,6 +497,9 @@ int mca_common_cuda_stage_one_init(void)
|
|||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuEventSynchronize);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuEventSynchronize);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuStreamSynchronize);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuStreamDestroy);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuStreamDestroy);
|
||||||
|
#if OPAL_CUDA_GET_ATTRIBUTES
|
||||||
|
OPAL_CUDA_DLSYM(libcuda_handle, cuPointerGetAttributes);
|
||||||
|
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1715,10 +1721,32 @@ static float mydifftime(struct timespec ts_start, struct timespec ts_end) {
|
|||||||
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
|
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
|
||||||
{
|
{
|
||||||
int res;
|
int res;
|
||||||
CUmemorytype memType;
|
CUmemorytype memType = 0;
|
||||||
CUdeviceptr dbuf = (CUdeviceptr)pUserBuf;
|
CUdeviceptr dbuf = (CUdeviceptr)pUserBuf;
|
||||||
CUcontext ctx = NULL;
|
CUcontext ctx = NULL;
|
||||||
|
#if OPAL_CUDA_GET_ATTRIBUTES
|
||||||
|
uint32_t isManaged = 0;
|
||||||
|
/* With CUDA 7.0, we can get multiple attributes with a single call */
|
||||||
|
CUpointer_attribute attributes[3] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
|
||||||
|
CU_POINTER_ATTRIBUTE_CONTEXT,
|
||||||
|
CU_POINTER_ATTRIBUTE_IS_MANAGED};
|
||||||
|
void *attrdata[] = {(void *)&memType, (void *)&ctx, (void *)&isManaged};
|
||||||
|
|
||||||
|
res = cuFunc.cuPointerGetAttributes(3, attributes, attrdata, dbuf);
|
||||||
|
if (res != CUDA_SUCCESS) {
|
||||||
|
/* If we cannot determine it is device pointer,
|
||||||
|
* just assume it is not. */
|
||||||
|
return 0;
|
||||||
|
} else if (memType == CU_MEMORYTYPE_HOST) {
|
||||||
|
/* Host memory, nothing to do here */
|
||||||
|
return 0;
|
||||||
|
} else if (memType == 0) {
|
||||||
|
/* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* Must be a device pointer */
|
||||||
|
assert(memType == CU_MEMORYTYPE_DEVICE);
|
||||||
|
#else /* OPAL_CUDA_GET_ATTRIBUTES */
|
||||||
res = cuFunc.cuPointerGetAttribute(&memType,
|
res = cuFunc.cuPointerGetAttribute(&memType,
|
||||||
CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
|
CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
|
||||||
if (res != CUDA_SUCCESS) {
|
if (res != CUDA_SUCCESS) {
|
||||||
@ -1741,6 +1769,7 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
|
|||||||
* and set the current context to that. It is rare that we will not
|
* and set the current context to that. It is rare that we will not
|
||||||
* have a context. */
|
* have a context. */
|
||||||
res = cuFunc.cuCtxGetCurrent(&ctx);
|
res = cuFunc.cuCtxGetCurrent(&ctx);
|
||||||
|
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
|
||||||
if (OPAL_UNLIKELY(NULL == ctx)) {
|
if (OPAL_UNLIKELY(NULL == ctx)) {
|
||||||
if (CUDA_SUCCESS == res) {
|
if (CUDA_SUCCESS == res) {
|
||||||
res = cuFunc.cuPointerGetAttribute(&ctx,
|
res = cuFunc.cuPointerGetAttribute(&ctx,
|
||||||
@ -1768,6 +1797,15 @@ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if OPAL_CUDA_GET_ATTRIBUTES
|
||||||
|
if (1 == isManaged) {
|
||||||
|
/* Currently cannot support managed memory */
|
||||||
|
opal_output(0, "CUDA: ptr=%p: CUDA-aware Open MPI detected managed memory but there "
|
||||||
|
"is no support for it. Result will be unpredictable.", pUserBuf);
|
||||||
|
return OPAL_ERROR;
|
||||||
|
}
|
||||||
|
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
|
||||||
|
|
||||||
/* First access on a device pointer finalizes CUDA support initialization.
|
/* First access on a device pointer finalizes CUDA support initialization.
|
||||||
* If initialization fails, disable support. */
|
* If initialization fails, disable support. */
|
||||||
if (!stage_three_init_complete) {
|
if (!stage_three_init_complete) {
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user