1
1

Make CUDA 4.1 a requirement for CUDA-aware support.

Remove all related preprocessor conditionals.
Этот коммит содержится в:
Rolf vandeVaart 2015-10-29 11:24:02 -04:00
родитель 8ad9b450c4
Коммит f2ff6e03ab
4 изменённых файлов: 9 добавлений и 34 удалений

Просмотреть файл

@ -16,7 +16,7 @@ dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
dnl reserved. dnl reserved.
dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved.
dnl Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved.
dnl Copyright (c) 2015 Research Organization for Information Science dnl Copyright (c) 2015 Research Organization for Information Science
dnl and Technology (RIST). All rights reserved. dnl and Technology (RIST). All rights reserved.
dnl dnl
@ -79,10 +79,13 @@ dnl common framework, and likely configured first). So we have to
dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4 dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4
dnl macro, below). :-( dnl macro, below). :-(
# If we have CUDA support, check to see if we have CUDA 4.1 support # We require CUDA IPC support which started in CUDA 4.1. Error
AS_IF([test "$opal_check_cuda_happy"="yes"], # out if the support is not there.
AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0], AS_IF([test "$opal_check_cuda_happy" = "yes"],
[#include <$opal_cuda_incdir/cuda.h>]), [AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved],
[],
[AC_MSG_ERROR([Cannot continue because CUDA 4.1 or later is required])],
[#include <$opal_cuda_incdir/cuda.h>])],
[]) [])
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS # If we have CUDA support, check to see if we have support for SYNC_MEMOPS
@ -125,10 +128,6 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT, AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
[Whether we want cuda device pointer support]) [Whether we want cuda device pointer support])
AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41,
[Whether we have CUDA 4.1 support available])
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"]) AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS, AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available]) [Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -50,7 +50,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl, mca_bml_base_btl_t* bml_btl,
size_t size) { size_t size) {
int rc; int rc;
#if OPAL_CUDA_SUPPORT_41
sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
unsigned char *base; unsigned char *base;
@ -81,10 +80,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
} }
#else
/* Just do the rendezvous but set initial data to be sent to zero */
rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
#endif /* OPAL_CUDA_SUPPORT_41 */
return rc; return rc;
} }

Просмотреть файл

@ -56,7 +56,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl, mca_bml_base_btl_t* bml_btl,
size_t size) { size_t size) {
int rc; int rc;
#if OPAL_CUDA_SUPPORT_41
#if OPAL_CUDA_GDR_SUPPORT #if OPAL_CUDA_GDR_SUPPORT
/* With some BTLs, switch to RNDV from RGET at large messages */ /* With some BTLs, switch to RNDV from RGET at large messages */
if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
@ -95,10 +94,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq,
sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
} }
#else
/* Just do the rendezvous but set initial data to be sent to zero */
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0);
#endif /* OPAL_CUDA_SUPPORT_41 */
return rc; return rc;
} }

Просмотреть файл

@ -88,13 +88,11 @@ struct cudaFunctionTable {
int (*cuEventDestroy)(CUevent); int (*cuEventDestroy)(CUevent);
int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int); int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int);
int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr); int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr);
#if OPAL_CUDA_SUPPORT_41
int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent); int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent);
int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle); int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle);
int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int); int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int);
int (*cuIpcCloseMemHandle)(CUdeviceptr); int (*cuIpcCloseMemHandle)(CUdeviceptr);
int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr); int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr);
#endif /* OPAL_CUDA_SUPPORT_41 */
int (*cuCtxGetDevice)(CUdevice *); int (*cuCtxGetDevice)(CUdevice *);
int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice); int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice);
int (*cuDeviceGet)(CUdevice *, int); int (*cuDeviceGet)(CUdevice *, int);
@ -156,7 +154,6 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t,
NULL, NULL,
NULL); NULL);
#if OPAL_CUDA_SUPPORT_41
static int mca_common_cuda_async = 1; static int mca_common_cuda_async = 1;
static int mca_common_cuda_cumemcpy_async; static int mca_common_cuda_cumemcpy_async;
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
@ -223,8 +220,6 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
#define CUDA_DUMP_EVTHANDLE(a) #define CUDA_DUMP_EVTHANDLE(a)
#endif /* OPAL_ENABLE_DEBUG */ #endif /* OPAL_ENABLE_DEBUG */
#endif /* OPAL_CUDA_SUPPORT_41 */
/* This is a seperate function so we can see these variables with ompi_info and /* This is a seperate function so we can see these variables with ompi_info and
* also set them with the tools interface */ * also set them with the tools interface */
void mca_common_cuda_register_mca_variables(void) void mca_common_cuda_register_mca_variables(void)
@ -263,7 +258,6 @@ void mca_common_cuda_register_mca_variables(void)
MCA_BASE_VAR_SCOPE_READONLY, MCA_BASE_VAR_SCOPE_READONLY,
&mca_common_cuda_warning); &mca_common_cuda_warning);
#if OPAL_CUDA_SUPPORT_41
/* Use this flag to test async vs sync copies */ /* Use this flag to test async vs sync copies */
mca_common_cuda_async = 1; mca_common_cuda_async = 1;
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async", (void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async",
@ -280,7 +274,6 @@ void mca_common_cuda_register_mca_variables(void)
OPAL_INFO_LVL_9, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, MCA_BASE_VAR_SCOPE_READONLY,
&cuda_event_max); &cuda_event_max);
#endif /* OPAL_CUDA_SUPPORT_41 */
/* Use this flag to test cuMemcpyAsync vs cuMemcpy */ /* Use this flag to test cuMemcpyAsync vs cuMemcpy */
mca_common_cuda_cumemcpy_async = 1; mca_common_cuda_cumemcpy_async = 1;
@ -465,13 +458,11 @@ int mca_common_cuda_stage_one_init(void)
OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree); OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree);
OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc); OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc);
OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange); OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange);
#if OPAL_CUDA_SUPPORT_41
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle);
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle);
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle);
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle);
OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle);
#endif /* OPAL_CUDA_SUPPORT_41 */
OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice); OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice);
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer);
OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet);
@ -595,7 +586,6 @@ static int mca_common_cuda_stage_three_init(void)
return OPAL_ERROR; return OPAL_ERROR;
} }
#if OPAL_CUDA_SUPPORT_41
if (true == mca_common_cuda_enabled) { if (true == mca_common_cuda_enabled) {
/* Set up an array to store outstanding IPC async copy events */ /* Set up an array to store outstanding IPC async copy events */
cuda_event_ipc_num_used = 0; cuda_event_ipc_num_used = 0;
@ -633,7 +623,6 @@ static int mca_common_cuda_stage_three_init(void)
} }
} }
#endif /* OPAL_CUDA_SUPPORT_41 */
if (true == mca_common_cuda_enabled) { if (true == mca_common_cuda_enabled) {
/* Set up an array to store outstanding async dtoh events. Used on the /* Set up an array to store outstanding async dtoh events. Used on the
* sending side for asynchronous copies. */ * sending side for asynchronous copies. */
@ -1006,7 +995,6 @@ void mca_common_cuda_unregister(void *ptr, char *msg) {
} }
} }
#if OPAL_CUDA_SUPPORT_41
/* /*
* Get the memory handle of a local section of memory that can be sent * Get the memory handle of a local section of memory that can be sent
* to the remote size so it can access the memory. This is the * to the remote size so it can access the memory. This is the
@ -1739,8 +1727,6 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) {
} }
#endif /* OPAL_ENABLE_DEBUG */ #endif /* OPAL_ENABLE_DEBUG */
#endif /* OPAL_CUDA_SUPPORT_41 */
/* Routines that get plugged into the opal datatype code */ /* Routines that get plugged into the opal datatype code */
static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor) static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor)
{ {