1
1

Add optimization that can be used when CUDA 6.0 comes out. Use new pointer attribute.

This commit was SVN r29514.
Этот коммит содержится в:
Rolf vandeVaart 2013-10-24 21:17:58 +00:00
родитель d152ebff9e
Коммит fa5d20a5ec
2 изменённых файлов: 34 добавлений и 0 удалений

Просмотреть файл

@ -91,6 +91,9 @@ struct cudaFunctionTable {
int (*cuCtxGetDevice)(CUdevice *);
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
int (*cuDeviceGet)(CUdevice *, int);
#if OMPI_CUDA_SUPPORT_60
int (*cuPointerSetAttribute)(const void *, CUpointer_attribute, CUdeviceptr);
#endif /* OMPI_CUDA_SUPPORT_60 */
} cudaFunctionTable;
typedef struct cudaFunctionTable cudaFunctionTable_t;
cudaFunctionTable_t cuFunc;
@ -446,6 +449,9 @@ int mca_common_cuda_stage_one_init(void)
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
#if OMPI_CUDA_SUPPORT_60
OMPI_CUDA_DLSYM(libcuda_handle, cuPointerSetAttribute);
#endif /* OMPI_CUDA_SUPPORT_60 */
return 0;
}
@ -832,6 +838,21 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
cuda_reg->base.bound = (unsigned char *)pbase + psize - 1;
memcpy(&cuda_reg->memHandle, &memHandle, sizeof(memHandle));
#if OMPI_CUDA_SUPPORT_60
/* With CUDA 6.0, we can set an attribute on the memory pointer that will
* ensure any synchronous copies are completed prior to any other access
* of the memory region. This means we do not need to record an event
* and send to the remote side.
*/
memType = 1; /* Just use this variable since we already have it */
result = cuFunc.cuPointerSetAttribute(&memType, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
(CUdeviceptr)base);
if (CUDA_SUCCESS != result) {
opal_show_help("help-mpi-common-cuda.txt", "cuPointerSetAttribute failed",
true, result, base);
return OMPI_ERROR;
}
#else
/* Need to record the event to ensure that any memcopies into the
* device memory have completed. The event handle associated with
* this event is sent to the remote process so that it will wait
@ -845,6 +866,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
true, result, base);
return OMPI_ERROR;
}
#endif /* OMPI_CUDA_SUPPORT_60 */
return OMPI_SUCCESS;
}
@ -968,6 +990,10 @@ void mca_common_cuda_destruct_event(uint64_t *event)
*/
void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
{
#if OMPI_CUDA_SUPPORT_60
/* No need for any of this with CUDA 6.0 */
return;
#else /* OMPI_CUDA_SUPPORT_60 */
CUipcEventHandle evtHandle;
CUevent event;
CUresult result;
@ -1005,6 +1031,7 @@ void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
opal_show_help("help-mpi-common-cuda.txt", "cuEventDestroy failed",
true, result);
}
#endif /* OMPI_CUDA_SUPPORT_60 */
}
/*

Просмотреть файл

@ -172,3 +172,10 @@ could cause incorrect results. Turn of GPU Direct RDMA support by running with
--mca btl_openib_cuda_want_gdr_support 0.
cuPointerGetAttribute return value: %d
Check the cuda.h file for what the return value means.
[cuPointerSetAttribute failed]
The call to cuPointerSetAttribute failed. This is a unrecoverable error and will
cause the program to abort.
cuPointerSetAttribute return value: %d
Address: %p
Check the cuda.h file for what the return value means.
#