Make CUDA 4.1 a requirement for CUDA-aware support.
Remove all related preprocessor conditionals.
Этот коммит содержится в:
родитель
8ad9b450c4
Коммит
f2ff6e03ab
@ -16,7 +16,7 @@ dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
|
|||||||
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||||
dnl reserved.
|
dnl reserved.
|
||||||
dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved.
|
dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved.
|
||||||
dnl Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
|
dnl Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
|
||||||
dnl Copyright (c) 2015 Research Organization for Information Science
|
dnl Copyright (c) 2015 Research Organization for Information Science
|
||||||
dnl and Technology (RIST). All rights reserved.
|
dnl and Technology (RIST). All rights reserved.
|
||||||
dnl
|
dnl
|
||||||
@ -79,10 +79,13 @@ dnl common framework, and likely configured first). So we have to
|
|||||||
dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4
|
dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4
|
||||||
dnl macro, below). :-(
|
dnl macro, below). :-(
|
||||||
|
|
||||||
# If we have CUDA support, check to see if we have CUDA 4.1 support
|
# We require CUDA IPC support which started in CUDA 4.1. Error
|
||||||
AS_IF([test "$opal_check_cuda_happy"="yes"],
|
# out if the support is not there.
|
||||||
AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0],
|
AS_IF([test "$opal_check_cuda_happy" = "yes"],
|
||||||
[#include <$opal_cuda_incdir/cuda.h>]),
|
[AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved],
|
||||||
|
[],
|
||||||
|
[AC_MSG_ERROR([Cannot continue because CUDA 4.1 or later is required])],
|
||||||
|
[#include <$opal_cuda_incdir/cuda.h>])],
|
||||||
[])
|
[])
|
||||||
|
|
||||||
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
|
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
|
||||||
@ -125,10 +128,6 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
|
|||||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
|
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
|
||||||
[Whether we want cuda device pointer support])
|
[Whether we want cuda device pointer support])
|
||||||
|
|
||||||
AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"])
|
|
||||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41,
|
|
||||||
[Whether we have CUDA 4.1 support available])
|
|
||||||
|
|
||||||
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
|
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
|
||||||
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
|
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
|
||||||
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
|
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||||
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -50,7 +50,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
|
|||||||
mca_bml_base_btl_t* bml_btl,
|
mca_bml_base_btl_t* bml_btl,
|
||||||
size_t size) {
|
size_t size) {
|
||||||
int rc;
|
int rc;
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
|
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
|
||||||
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
|
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
|
||||||
unsigned char *base;
|
unsigned char *base;
|
||||||
@ -81,10 +80,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
|
|||||||
sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
|
sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
|
||||||
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
/* Just do the rendezvous but set initial data to be sent to zero */
|
|
||||||
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,7 +56,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
|
|||||||
mca_bml_base_btl_t* bml_btl,
|
mca_bml_base_btl_t* bml_btl,
|
||||||
size_t size) {
|
size_t size) {
|
||||||
int rc;
|
int rc;
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
#if OPAL_CUDA_GDR_SUPPORT
|
#if OPAL_CUDA_GDR_SUPPORT
|
||||||
/* With some BTLs, switch to RNDV from RGET at large messages */
|
/* With some BTLs, switch to RNDV from RGET at large messages */
|
||||||
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
|
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
|
||||||
@ -95,10 +94,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
|
|||||||
sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
|
sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
|
||||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
/* Just do the rendezvous but set initial data to be sent to zero */
|
|
||||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
|
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,13 +88,11 @@ struct cudaFunctionTable {
|
|||||||
int (*cuEventDestroy)(CUevent);
|
int (*cuEventDestroy)(CUevent);
|
||||||
int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int);
|
int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int);
|
||||||
int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr);
|
int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr);
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent);
|
int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent);
|
||||||
int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle);
|
int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle);
|
||||||
int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int);
|
int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int);
|
||||||
int (*cuIpcCloseMemHandle)(CUdeviceptr);
|
int (*cuIpcCloseMemHandle)(CUdeviceptr);
|
||||||
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
|
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
int (*cuCtxGetDevice)(CUdevice *);
|
int (*cuCtxGetDevice)(CUdevice *);
|
||||||
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
|
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
|
||||||
int (*cuDeviceGet)(CUdevice *, int);
|
int (*cuDeviceGet)(CUdevice *, int);
|
||||||
@ -156,7 +154,6 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t,
|
|||||||
NULL,
|
NULL,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
static int mca_common_cuda_async = 1;
|
static int mca_common_cuda_async = 1;
|
||||||
static int mca_common_cuda_cumemcpy_async;
|
static int mca_common_cuda_cumemcpy_async;
|
||||||
#if OPAL_ENABLE_DEBUG
|
#if OPAL_ENABLE_DEBUG
|
||||||
@ -223,8 +220,6 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
|
|||||||
#define CUDA_DUMP_EVTHANDLE(a)
|
#define CUDA_DUMP_EVTHANDLE(a)
|
||||||
#endif /* OPAL_ENABLE_DEBUG */
|
#endif /* OPAL_ENABLE_DEBUG */
|
||||||
|
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
|
|
||||||
/* This is a seperate function so we can see these variables with ompi_info and
|
/* This is a seperate function so we can see these variables with ompi_info and
|
||||||
* also set them with the tools interface */
|
* also set them with the tools interface */
|
||||||
void mca_common_cuda_register_mca_variables(void)
|
void mca_common_cuda_register_mca_variables(void)
|
||||||
@ -263,7 +258,6 @@ void mca_common_cuda_register_mca_variables(void)
|
|||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&mca_common_cuda_warning);
|
&mca_common_cuda_warning);
|
||||||
|
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
/* Use this flag to test async vs sync copies */
|
/* Use this flag to test async vs sync copies */
|
||||||
mca_common_cuda_async = 1;
|
mca_common_cuda_async = 1;
|
||||||
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async",
|
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async",
|
||||||
@ -280,7 +274,6 @@ void mca_common_cuda_register_mca_variables(void)
|
|||||||
OPAL_INFO_LVL_9,
|
OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&cuda_event_max);
|
&cuda_event_max);
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
|
|
||||||
/* Use this flag to test cuMemcpyAsync vs cuMemcpy */
|
/* Use this flag to test cuMemcpyAsync vs cuMemcpy */
|
||||||
mca_common_cuda_cumemcpy_async = 1;
|
mca_common_cuda_cumemcpy_async = 1;
|
||||||
@ -465,13 +458,11 @@ int mca_common_cuda_stage_one_init(void)
|
|||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
|
||||||
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
|
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
|
||||||
@ -595,7 +586,6 @@ static int mca_common_cuda_stage_three_init(void)
|
|||||||
return OPAL_ERROR;
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
if (true == mca_common_cuda_enabled) {
|
if (true == mca_common_cuda_enabled) {
|
||||||
/* Set up an array to store outstanding IPC async copy events */
|
/* Set up an array to store outstanding IPC async copy events */
|
||||||
cuda_event_ipc_num_used = 0;
|
cuda_event_ipc_num_used = 0;
|
||||||
@ -633,7 +623,6 @@ static int mca_common_cuda_stage_three_init(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
if (true == mca_common_cuda_enabled) {
|
if (true == mca_common_cuda_enabled) {
|
||||||
/* Set up an array to store outstanding async dtoh events. Used on the
|
/* Set up an array to store outstanding async dtoh events. Used on the
|
||||||
* sending side for asynchronous copies. */
|
* sending side for asynchronous copies. */
|
||||||
@ -1006,7 +995,6 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_CUDA_SUPPORT_41
|
|
||||||
/*
|
/*
|
||||||
* Get the memory handle of a local section of memory that can be sent
|
* Get the memory handle of a local section of memory that can be sent
|
||||||
* to the remote size so it can access the memory. This is the
|
* to the remote size so it can access the memory. This is the
|
||||||
@ -1739,8 +1727,6 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) {
|
|||||||
}
|
}
|
||||||
#endif /* OPAL_ENABLE_DEBUG */
|
#endif /* OPAL_ENABLE_DEBUG */
|
||||||
|
|
||||||
#endif /* OPAL_CUDA_SUPPORT_41 */
|
|
||||||
|
|
||||||
/* Routines that get plugged into the opal datatype code */
|
/* Routines that get plugged into the opal datatype code */
|
||||||
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor)
|
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor)
|
||||||
{
|
{
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user