1
1

Chnage some CUDA configure code and macro names per review request by jsquyres in ticket #3880.

Functionally, nothing changes.

This commit was SVN r29815.
Этот коммит содержится в:
Rolf vandeVaart 2013-12-06 14:35:10 +00:00
родитель 231ebb09c9
Коммит d556b60b21
12 изменённых файлов: 66 добавлений и 51 удалений

Просмотреть файл

@ -71,19 +71,28 @@ AS_IF([test "$with_cuda" = "no" -o "x$with_cuda" = "x"],
# If we have CUDA support, check to see if we have CUDA 4.1 support
AS_IF([test "$opal_check_cuda_happy"="yes"],
AC_CHECK_HEADER([$opal_cuda_incdir/cuda.h])
AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0],
[#include <$opal_cuda_incdir/cuda.h>]),
[])
# If we have CUDA support, check to see if we have CUDA 6.0 support.
# Look for new CUDA 6.0 attribute.
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
# which was first introduced in CUDA 6.0.
AS_IF([test "$opal_check_cuda_happy"="yes"],
AC_CHECK_HEADER([$opal_cuda_incdir/cuda.h])
AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_BUFFER_ID], [CUDA_SUPPORT_60=1], [CUDA_SUPPORT_60=0],
AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0],
[#include <$opal_cuda_incdir/cuda.h>]),
[])
# If we have CUDA support, check to see if we have CUDA 6.0 or later.
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <$opal_cuda_incdir/cuda.h>]],
[[
#if CUDA_VERSION < 6000
#error "CUDA_VERSION is less than 6000"
#endif
]])],
[CUDA_VERSION_60_OR_GREATER=1],
[CUDA_VERSION_60_OR_GREATER=0])
AC_MSG_CHECKING([if have cuda support])
if test "$opal_check_cuda_happy" = "yes"; then
AC_MSG_RESULT([yes (-I$with_cuda)])
@ -103,8 +112,14 @@ AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41,
[Whether we have CUDA 4.1 support available])
AM_CONDITIONAL([OPAL_cuda_support_60], [test "x$CUDA_SUPPORT_60" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_60],$CUDA_SUPPORT_60,
[Whether we have CUDA 6.0 support available])
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
# There is nothing specific we can check for to see if GPU Direct RDMA is available.
# Therefore, we check to see whether we have CUDA 6.0 or later.
AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_GDR_SUPPORT],$CUDA_VERSION_60_OR_GREATER,
[Whether we have CUDA GDR support available])
])

Просмотреть файл

@ -76,7 +76,7 @@ int mca_btl_base_param_register(mca_base_component_t *version,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_eager_limit);
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
/* If no CUDA RDMA support, zero them out */
if (!(MCA_BTL_FLAGS_CUDA_GET & module->btl_flags)) {
module->btl_cuda_eager_limit = 0;
@ -92,7 +92,7 @@ int mca_btl_base_param_register(mca_base_component_t *version,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_cuda_rdma_limit);
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
(void) mca_base_component_var_register(version, "max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,

Просмотреть файл

@ -843,10 +843,10 @@ struct mca_btl_base_module_t {
mca_btl_base_module_register_error_fn_t btl_register_error;
/** fault tolerant even notification */
mca_btl_base_module_ft_event_fn_t btl_ft_event;
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
size_t btl_cuda_eager_limit; /**< switch from eager to RDMA */
size_t btl_cuda_rdma_limit; /**< switch from RDMA to rndv pipeline */
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
};
typedef struct mca_btl_base_module_t mca_btl_base_module_t;

Просмотреть файл

@ -1219,11 +1219,11 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
openib_btl = (mca_btl_openib_module_t*)btl;
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
if(opal_convertor_cuda_need_buffers(convertor) == false && 0 == reserve) {
#else
if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) {
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT
/* GMS bloody HACK! */
if(registration != NULL || max_data > btl->btl_max_send_size) {
frag = alloc_send_user_frag();
@ -1382,11 +1382,11 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
* register the region ourselves
*/
uint32_t mflags = 0;
#if OPAL_CUDA_SUPPORT
#if OPAL_CUDA_GDR_SUPPORT
if (convertor->flags & CONVERTOR_CUDA) {
mflags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM;
}
#endif /* OPAL_CUDA_SUPPORT */
#endif /* OPAL_CUDA_GDR_SUPPORT */
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, buffer, *size, mflags,
&registration);
if(OMPI_SUCCESS != rc || NULL == registration) {

Просмотреть файл

@ -582,7 +582,7 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_component.use_message_coalescing = 0;
/* Indicates if library was built with GPU Direct RDMA support. Not changeable. */
mca_btl_openib_component.cuda_have_gdr = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT_60);
mca_btl_openib_component.cuda_have_gdr = OPAL_INT_TO_BOOL(OPAL_CUDA_GDR_SUPPORT);
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, "have_cuda_gdr_support",
"Whether CUDA GPU Direct RDMA support is built into library or not",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
@ -601,7 +601,7 @@ int btl_openib_register_mca_params(void)
opal_output(0, "GDR support requested but library does not have it built in.");
return OMPI_ERROR;
}
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
if (mca_btl_openib_component.cuda_want_gdr) {
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_GET;
mca_btl_openib_module.super.btl_cuda_eager_limit = SIZE_MAX; /* magic number - indicates set it to minimum */
@ -610,7 +610,7 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_module.super.btl_cuda_eager_limit = 0; /* Turns off any of the GPU Direct RDMA code */
mca_btl_openib_module.super.btl_cuda_rdma_limit = 0; /* Unused */
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
#endif /* OPAL_CUDA_SUPPORT */
CHECK(mca_btl_base_param_register(
&mca_btl_openib_component.super.btl_version,

Просмотреть файл

@ -91,9 +91,9 @@ struct cudaFunctionTable {
int (*cuCtxGetDevice)(CUdevice *);
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
int (*cuDeviceGet)(CUdevice *, int);
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
int (*cuPointerSetAttribute)(const void *, CUpointer_attribute, CUdeviceptr);
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
int (*cuCtxSetCurrent)(CUcontext);
} cudaFunctionTable;
typedef struct cudaFunctionTable cudaFunctionTable_t;
@ -452,9 +452,9 @@ int mca_common_cuda_stage_one_init(void)
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
OMPI_CUDA_DLSYM(libcuda_handle, cuPointerSetAttribute);
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxSetCurrent);
return 0;
}
@ -842,7 +842,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
cuda_reg->base.bound = (unsigned char *)pbase + psize - 1;
memcpy(&cuda_reg->memHandle, &memHandle, sizeof(memHandle));
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_SYNC_MEMOPS
/* With CUDA 6.0, we can set an attribute on the memory pointer that will
* ensure any synchronous copies are completed prior to any other access
* of the memory region. This means we do not need to record an event
@ -870,7 +870,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
true, result, base);
return OMPI_ERROR;
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_SYNC_MEMOPS */
return OMPI_SUCCESS;
}
@ -994,10 +994,10 @@ void mca_common_cuda_destruct_event(uint64_t *event)
*/
void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
{
#if OPAL_CUDA_SUPPORT_60
/* No need for any of this with CUDA 6.0 */
#if OPAL_CUDA_SYNC_MEMOPS
/* No need for any of this with SYNC_MEMOPS feature */
return;
#else /* OPAL_CUDA_SUPPORT_60 */
#else /* OPAL_CUDA_SYNC_MEMOPS */
CUipcEventHandle evtHandle;
CUevent event;
CUresult result;
@ -1035,7 +1035,7 @@ void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
opal_show_help("help-mpi-common-cuda.txt", "cuEventDestroy failed",
true, result);
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_SYNC_MEMOPS */
}
/*
@ -1644,7 +1644,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
return 0;
}
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
#if OPAL_CUDA_GDR_SUPPORT && OMPI_GDR_SUPPORT
/* Check to see if the memory was freed between the time it was stored in
* the registration cache and now. Return true if the memory was previously
* freed. This is indicated by the BUFFER_ID value in the registration cache
@ -1707,5 +1707,5 @@ void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */

Просмотреть файл

@ -75,10 +75,10 @@ OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
#if OPAL_CUDA_GDR_SUPPORT && OMPI_GDR_SUPPORT
OMPI_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
/**
* Return: 0 if no packing is required for sending (the upper layer
* can use directly the pointer to the contiguous user

Просмотреть файл

@ -36,9 +36,9 @@
#include "opal/align.h"
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
#include "ompi/mca/common/cuda/common_cuda.h"
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
#include "ompi/mca/rcache/rcache.h"
#include "ompi/mca/rcache/base/base.h"
#include "ompi/mca/rte/rte.h"
@ -47,9 +47,9 @@
#include "ompi/mca/mpool/base/base.h"
#include "mpool_grdma.h"
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size);
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
static void mca_mpool_grdma_pool_contructor (mca_mpool_grdma_pool_t *pool)
{
memset ((void *)((uintptr_t)pool + sizeof (pool->super)), 0, sizeof (*pool) - sizeof (pool->super));
@ -236,7 +236,7 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
do_unregistration_gc(mpool);
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
size_t psize;
mca_common_cuda_get_address_range(&base, &psize, addr);
@ -245,7 +245,7 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
* this call will boot it out of the cache. */
check_for_cuda_freed_memory(mpool, base, psize);
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
/* look through existing regs if not persistent registration requested.
* Persistent registration are always registered and placed in the cache */
@ -287,11 +287,11 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
grdma_reg->base = base;
grdma_reg->bound = bound;
grdma_reg->flags = flags;
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
mca_common_cuda_get_buffer_id(grdma_reg);
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
if (false == bypass_cache) {
rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);
@ -466,7 +466,7 @@ int mca_mpool_grdma_release_memory(struct mca_mpool_base_module_t *mpool,
* that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do
* kick out the regisrations and deregister. This function needs to be called
* with the mpool->rcache->lock held. */
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size)
{
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
@ -515,7 +515,7 @@ static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *add
return rc;
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
{

Просмотреть файл

@ -57,9 +57,9 @@ struct mca_mpool_base_registration_t {
int32_t ref_count;
uint32_t flags;
void *mpool_context;
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
unsigned long long gpu_bufID;
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
};
typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t;

Просмотреть файл

@ -366,7 +366,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
rc = OMPI_ERR_BAD_PARAM;
goto cleanup_and_return;
}
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
/* If size is SIZE_MAX, then we know we want to set this to the minimum possible
* value which is the size of the PML header. */
if (SIZE_MAX == sm->btl_module->btl_cuda_eager_limit) {
@ -405,7 +405,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
goto cleanup_and_return;
}
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
}

Просмотреть файл

@ -54,13 +54,13 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
size_t size) {
int rc;
#if OPAL_CUDA_SUPPORT_41
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
/* With some BTLs, switch to RNDV from RGET at large messages */
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
(sendreq->req_send.req_bytes_packed > (bml_btl->btl->btl_cuda_rdma_limit - sizeof(mca_pml_ob1_hdr_t)))) {
return mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {

Просмотреть файл

@ -366,11 +366,11 @@ mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq,
size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t);
int rc;
#if OPAL_CUDA_SUPPORT_60
#if OPAL_CUDA_GDR_SUPPORT
if (btl->btl_cuda_eager_limit && (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA)) {
eager_limit = btl->btl_cuda_eager_limit - sizeof(mca_pml_ob1_hdr_t);
}
#endif /* OPAL_CUDA_SUPPORT_60 */
#endif /* OPAL_CUDA_GDR_SUPPORT */
if( OPAL_LIKELY(size <= eager_limit) ) {
switch(sendreq->req_send.req_send_mode) {