Chnage some CUDA configure code and macro names per review request by jsquyres in ticket #3880.
Functionally, nothing changes. This commit was SVN r29815.
Этот коммит содержится в:
родитель
231ebb09c9
Коммит
d556b60b21
@ -71,19 +71,28 @@ AS_IF([test "$with_cuda" = "no" -o "x$with_cuda" = "x"],
|
||||
|
||||
# If we have CUDA support, check to see if we have CUDA 4.1 support
|
||||
AS_IF([test "$opal_check_cuda_happy"="yes"],
|
||||
AC_CHECK_HEADER([$opal_cuda_incdir/cuda.h])
|
||||
AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0],
|
||||
[#include <$opal_cuda_incdir/cuda.h>]),
|
||||
[])
|
||||
|
||||
# If we have CUDA support, check to see if we have CUDA 6.0 support.
|
||||
# Look for new CUDA 6.0 attribute.
|
||||
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
|
||||
# which was first introduced in CUDA 6.0.
|
||||
AS_IF([test "$opal_check_cuda_happy"="yes"],
|
||||
AC_CHECK_HEADER([$opal_cuda_incdir/cuda.h])
|
||||
AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_BUFFER_ID], [CUDA_SUPPORT_60=1], [CUDA_SUPPORT_60=0],
|
||||
AC_CHECK_DECL([CU_POINTER_ATTRIBUTE_SYNC_MEMOPS], [CUDA_SYNC_MEMOPS=1], [CUDA_SYNC_MEMOPS=0],
|
||||
[#include <$opal_cuda_incdir/cuda.h>]),
|
||||
[])
|
||||
|
||||
# If we have CUDA support, check to see if we have CUDA 6.0 or later.
|
||||
AC_COMPILE_IFELSE(
|
||||
[AC_LANG_PROGRAM([[#include <$opal_cuda_incdir/cuda.h>]],
|
||||
[[
|
||||
#if CUDA_VERSION < 6000
|
||||
#error "CUDA_VERSION is less than 6000"
|
||||
#endif
|
||||
]])],
|
||||
[CUDA_VERSION_60_OR_GREATER=1],
|
||||
[CUDA_VERSION_60_OR_GREATER=0])
|
||||
|
||||
AC_MSG_CHECKING([if have cuda support])
|
||||
if test "$opal_check_cuda_happy" = "yes"; then
|
||||
AC_MSG_RESULT([yes (-I$with_cuda)])
|
||||
@ -103,8 +112,14 @@ AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"])
|
||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41,
|
||||
[Whether we have CUDA 4.1 support available])
|
||||
|
||||
AM_CONDITIONAL([OPAL_cuda_support_60], [test "x$CUDA_SUPPORT_60" = "x1"])
|
||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_60],$CUDA_SUPPORT_60,
|
||||
[Whether we have CUDA 6.0 support available])
|
||||
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
|
||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
|
||||
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
|
||||
|
||||
# There is nothing specific we can check for to see if GPU Direct RDMA is available.
|
||||
# Therefore, we check to see whether we have CUDA 6.0 or later.
|
||||
AM_CONDITIONAL([OPAL_cuda_gdr_support], [test "x$CUDA_VERSION_60_OR_GREATER" = "x1"])
|
||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_GDR_SUPPORT],$CUDA_VERSION_60_OR_GREATER,
|
||||
[Whether we have CUDA GDR support available])
|
||||
|
||||
])
|
||||
|
@ -76,7 +76,7 @@ int mca_btl_base_param_register(mca_base_component_t *version,
|
||||
OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&module->btl_eager_limit);
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
/* If no CUDA RDMA support, zero them out */
|
||||
if (!(MCA_BTL_FLAGS_CUDA_GET & module->btl_flags)) {
|
||||
module->btl_cuda_eager_limit = 0;
|
||||
@ -92,7 +92,7 @@ int mca_btl_base_param_register(mca_base_component_t *version,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&module->btl_cuda_rdma_limit);
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
(void) mca_base_component_var_register(version, "max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)",
|
||||
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
|
||||
|
@ -843,10 +843,10 @@ struct mca_btl_base_module_t {
|
||||
mca_btl_base_module_register_error_fn_t btl_register_error;
|
||||
/** fault tolerant even notification */
|
||||
mca_btl_base_module_ft_event_fn_t btl_ft_event;
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
size_t btl_cuda_eager_limit; /**< switch from eager to RDMA */
|
||||
size_t btl_cuda_rdma_limit; /**< switch from RDMA to rndv pipeline */
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
};
|
||||
typedef struct mca_btl_base_module_t mca_btl_base_module_t;
|
||||
|
||||
|
@ -1219,11 +1219,11 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
|
||||
openib_btl = (mca_btl_openib_module_t*)btl;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
if(opal_convertor_cuda_need_buffers(convertor) == false && 0 == reserve) {
|
||||
#else
|
||||
if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) {
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT
|
||||
/* GMS bloody HACK! */
|
||||
if(registration != NULL || max_data > btl->btl_max_send_size) {
|
||||
frag = alloc_send_user_frag();
|
||||
@ -1382,11 +1382,11 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
|
||||
* register the region ourselves
|
||||
*/
|
||||
uint32_t mflags = 0;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
if (convertor->flags & CONVERTOR_CUDA) {
|
||||
mflags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, buffer, *size, mflags,
|
||||
®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
||||
|
@ -582,7 +582,7 @@ int btl_openib_register_mca_params(void)
|
||||
mca_btl_openib_component.use_message_coalescing = 0;
|
||||
|
||||
/* Indicates if library was built with GPU Direct RDMA support. Not changeable. */
|
||||
mca_btl_openib_component.cuda_have_gdr = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT_60);
|
||||
mca_btl_openib_component.cuda_have_gdr = OPAL_INT_TO_BOOL(OPAL_CUDA_GDR_SUPPORT);
|
||||
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version, "have_cuda_gdr_support",
|
||||
"Whether CUDA GPU Direct RDMA support is built into library or not",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
|
||||
@ -601,7 +601,7 @@ int btl_openib_register_mca_params(void)
|
||||
opal_output(0, "GDR support requested but library does not have it built in.");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
if (mca_btl_openib_component.cuda_want_gdr) {
|
||||
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_GET;
|
||||
mca_btl_openib_module.super.btl_cuda_eager_limit = SIZE_MAX; /* magic number - indicates set it to minimum */
|
||||
@ -610,7 +610,7 @@ int btl_openib_register_mca_params(void)
|
||||
mca_btl_openib_module.super.btl_cuda_eager_limit = 0; /* Turns off any of the GPU Direct RDMA code */
|
||||
mca_btl_openib_module.super.btl_cuda_rdma_limit = 0; /* Unused */
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
CHECK(mca_btl_base_param_register(
|
||||
&mca_btl_openib_component.super.btl_version,
|
||||
|
@ -91,9 +91,9 @@ struct cudaFunctionTable {
|
||||
int (*cuCtxGetDevice)(CUdevice *);
|
||||
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
|
||||
int (*cuDeviceGet)(CUdevice *, int);
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
int (*cuPointerSetAttribute)(const void *, CUpointer_attribute, CUdeviceptr);
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
int (*cuCtxSetCurrent)(CUcontext);
|
||||
} cudaFunctionTable;
|
||||
typedef struct cudaFunctionTable cudaFunctionTable_t;
|
||||
@ -452,9 +452,9 @@ int mca_common_cuda_stage_one_init(void)
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuPointerSetAttribute);
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
OMPI_CUDA_DLSYM(libcuda_handle, cuCtxSetCurrent);
|
||||
return 0;
|
||||
}
|
||||
@ -842,7 +842,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
|
||||
cuda_reg->base.bound = (unsigned char *)pbase + psize - 1;
|
||||
memcpy(&cuda_reg->memHandle, &memHandle, sizeof(memHandle));
|
||||
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_SYNC_MEMOPS
|
||||
/* With CUDA 6.0, we can set an attribute on the memory pointer that will
|
||||
* ensure any synchronous copies are completed prior to any other access
|
||||
* of the memory region. This means we do not need to record an event
|
||||
@ -870,7 +870,7 @@ int cuda_getmemhandle(void *base, size_t size, mca_mpool_base_registration_t *ne
|
||||
true, result, base);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_SYNC_MEMOPS */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -994,10 +994,10 @@ void mca_common_cuda_destruct_event(uint64_t *event)
|
||||
*/
|
||||
void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
|
||||
{
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
/* No need for any of this with CUDA 6.0 */
|
||||
#if OPAL_CUDA_SYNC_MEMOPS
|
||||
/* No need for any of this with SYNC_MEMOPS feature */
|
||||
return;
|
||||
#else /* OPAL_CUDA_SUPPORT_60 */
|
||||
#else /* OPAL_CUDA_SYNC_MEMOPS */
|
||||
CUipcEventHandle evtHandle;
|
||||
CUevent event;
|
||||
CUresult result;
|
||||
@ -1035,7 +1035,7 @@ void mca_common_wait_stream_synchronize(mca_mpool_common_cuda_reg_t *rget_reg)
|
||||
opal_show_help("help-mpi-common-cuda.txt", "cuEventDestroy failed",
|
||||
true, result);
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_SYNC_MEMOPS */
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1644,7 +1644,7 @@ int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
|
||||
#if OPAL_CUDA_GDR_SUPPORT && OMPI_GDR_SUPPORT
|
||||
/* Check to see if the memory was freed between the time it was stored in
|
||||
* the registration cache and now. Return true if the memory was previously
|
||||
* freed. This is indicated by the BUFFER_ID value in the registration cache
|
||||
@ -1707,5 +1707,5 @@ void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg)
|
||||
|
||||
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
|
@ -75,10 +75,10 @@ OMPI_DECLSPEC int mca_common_cuda_get_device(int *devicenum);
|
||||
OMPI_DECLSPEC int mca_common_cuda_device_can_access_peer(int *access, int dev1, int dev2);
|
||||
OMPI_DECLSPEC int mca_common_cuda_stage_one_init(void);
|
||||
OMPI_DECLSPEC int mca_common_cuda_get_address_range(void *pbase, size_t *psize, void *base);
|
||||
#if OPAL_CUDA_SUPPORT_60 && OMPI_GDR_SUPPORT
|
||||
#if OPAL_CUDA_GDR_SUPPORT && OMPI_GDR_SUPPORT
|
||||
OMPI_DECLSPEC bool mca_common_cuda_previously_freed_memory(mca_mpool_base_registration_t *reg);
|
||||
OMPI_DECLSPEC void mca_common_cuda_get_buffer_id(mca_mpool_base_registration_t *reg);
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
/**
|
||||
* Return: 0 if no packing is required for sending (the upper layer
|
||||
* can use directly the pointer to the contiguous user
|
||||
|
@ -36,9 +36,9 @@
|
||||
|
||||
#include "opal/align.h"
|
||||
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
#include "ompi/mca/common/cuda/common_cuda.h"
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
#include "ompi/mca/rcache/rcache.h"
|
||||
#include "ompi/mca/rcache/base/base.h"
|
||||
#include "ompi/mca/rte/rte.h"
|
||||
@ -47,9 +47,9 @@
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "mpool_grdma.h"
|
||||
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size);
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
static void mca_mpool_grdma_pool_contructor (mca_mpool_grdma_pool_t *pool)
|
||||
{
|
||||
memset ((void *)((uintptr_t)pool + sizeof (pool->super)), 0, sizeof (*pool) - sizeof (pool->super));
|
||||
@ -236,7 +236,7 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
if (!opal_list_is_empty (&mpool_grdma->pool->gc_list))
|
||||
do_unregistration_gc(mpool);
|
||||
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
|
||||
size_t psize;
|
||||
mca_common_cuda_get_address_range(&base, &psize, addr);
|
||||
@ -245,7 +245,7 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
* this call will boot it out of the cache. */
|
||||
check_for_cuda_freed_memory(mpool, base, psize);
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
/* look through existing regs if not persistent registration requested.
|
||||
* Persistent registration are always registered and placed in the cache */
|
||||
@ -287,11 +287,11 @@ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr,
|
||||
grdma_reg->base = base;
|
||||
grdma_reg->bound = bound;
|
||||
grdma_reg->flags = flags;
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
if (flags & MCA_MPOOL_FLAGS_CUDA_GPU_MEM) {
|
||||
mca_common_cuda_get_buffer_id(grdma_reg);
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
if (false == bypass_cache) {
|
||||
rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0);
|
||||
@ -466,7 +466,7 @@ int mca_mpool_grdma_release_memory(struct mca_mpool_base_module_t *mpool,
|
||||
* that we do not have a cuMemAlloc, cuMemFree, cuMemAlloc state. If we do
|
||||
* kick out the regisrations and deregister. This function needs to be called
|
||||
* with the mpool->rcache->lock held. */
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *addr, size_t size)
|
||||
{
|
||||
mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) mpool;
|
||||
@ -515,7 +515,7 @@ static int check_for_cuda_freed_memory(mca_mpool_base_module_t *mpool, void *add
|
||||
|
||||
return rc;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
void mca_mpool_grdma_finalize(struct mca_mpool_base_module_t *mpool)
|
||||
{
|
||||
|
@ -57,9 +57,9 @@ struct mca_mpool_base_registration_t {
|
||||
int32_t ref_count;
|
||||
uint32_t flags;
|
||||
void *mpool_context;
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
unsigned long long gpu_bufID;
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
};
|
||||
|
||||
typedef struct mca_mpool_base_registration_t mca_mpool_base_registration_t;
|
||||
|
@ -366,7 +366,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
rc = OMPI_ERR_BAD_PARAM;
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
/* If size is SIZE_MAX, then we know we want to set this to the minimum possible
|
||||
* value which is the size of the PML header. */
|
||||
if (SIZE_MAX == sm->btl_module->btl_cuda_eager_limit) {
|
||||
@ -405,7 +405,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
}
|
||||
|
||||
|
||||
|
@ -54,13 +54,13 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
|
||||
size_t size) {
|
||||
int rc;
|
||||
#if OPAL_CUDA_SUPPORT_41
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
/* With some BTLs, switch to RNDV from RGET at large messages */
|
||||
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
|
||||
(sendreq->req_send.req_bytes_packed > (bml_btl->btl->btl_cuda_rdma_limit - sizeof(mca_pml_ob1_hdr_t)))) {
|
||||
return mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
|
||||
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
|
||||
|
@ -366,11 +366,11 @@ mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq,
|
||||
size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t);
|
||||
int rc;
|
||||
|
||||
#if OPAL_CUDA_SUPPORT_60
|
||||
#if OPAL_CUDA_GDR_SUPPORT
|
||||
if (btl->btl_cuda_eager_limit && (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA)) {
|
||||
eager_limit = btl->btl_cuda_eager_limit - sizeof(mca_pml_ob1_hdr_t);
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT_60 */
|
||||
#endif /* OPAL_CUDA_GDR_SUPPORT */
|
||||
|
||||
if( OPAL_LIKELY(size <= eager_limit) ) {
|
||||
switch(sendreq->req_send.req_send_mode) {
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user