From f2ff6e03ab90709f5c718ca759dfa84ca8c2ba73 Mon Sep 17 00:00:00 2001 From: Rolf vandeVaart Date: Thu, 29 Oct 2015 11:24:02 -0400 Subject: [PATCH] Make CUDA 4.1 a requirement for CUDA-aware support. Remove all related preprocessor conditionals. --- config/opal_check_cuda.m4 | 17 ++++++++--------- ompi/mca/pml/bfo/pml_bfo_cuda.c | 7 +------ ompi/mca/pml/ob1/pml_ob1_cuda.c | 5 ----- opal/mca/common/cuda/common_cuda.c | 14 -------------- 4 files changed, 9 insertions(+), 34 deletions(-) diff --git a/config/opal_check_cuda.m4 b/config/opal_check_cuda.m4 index 7040f5c515..11456ef905 100644 --- a/config/opal_check_cuda.m4 +++ b/config/opal_check_cuda.m4 @@ -16,7 +16,7 @@ dnl Copyright (c) 2009 IBM Corporation. All rights reserved. dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights dnl reserved. dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. -dnl Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. +dnl Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. dnl @@ -79,10 +79,13 @@ dnl common framework, and likely configured first). So we have to dnl defer this check until later (see the OPAL_CHECK_CUDA_AFTER_OPAL_DL m4 dnl macro, below). :-( -# If we have CUDA support, check to see if we have CUDA 4.1 support -AS_IF([test "$opal_check_cuda_happy"="yes"], - AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], [CUDA_SUPPORT_41=1], [CUDA_SUPPORT_41=0], - [#include <$opal_cuda_incdir/cuda.h>]), +# We require CUDA IPC support which started in CUDA 4.1. Error +# out if the support is not there. +AS_IF([test "$opal_check_cuda_happy" = "yes"], + [AC_CHECK_MEMBER([struct CUipcMemHandle_st.reserved], + [], + [AC_MSG_ERROR([Cannot continue because CUDA 4.1 or later is required])], + [#include <$opal_cuda_incdir/cuda.h>])], []) # If we have CUDA support, check to see if we have support for SYNC_MEMOPS @@ -125,10 +128,6 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"]) AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT, [Whether we want cuda device pointer support]) -AM_CONDITIONAL([OPAL_cuda_support_41], [test "x$CUDA_SUPPORT_41" = "x1"]) -AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT_41],$CUDA_SUPPORT_41, - [Whether we have CUDA 4.1 support available]) - AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"]) AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS, [Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available]) diff --git a/ompi/mca/pml/bfo/pml_bfo_cuda.c b/ompi/mca/pml/bfo/pml_bfo_cuda.c index 9c593cd691..eb35b226e0 100644 --- a/ompi/mca/pml/bfo/pml_bfo_cuda.c +++ b/ompi/mca/pml/bfo/pml_bfo_cuda.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2012-2015 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,7 +50,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size) { int rc; -#if OPAL_CUDA_SUPPORT_41 sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA; if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { unsigned char *base; @@ -81,10 +80,6 @@ int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq, sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); } -#else - /* Just do the rendezvous but set initial data to be sent to zero */ - rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0); -#endif /* OPAL_CUDA_SUPPORT_41 */ return rc; } diff --git a/ompi/mca/pml/ob1/pml_ob1_cuda.c b/ompi/mca/pml/ob1/pml_ob1_cuda.c index a44a8b377c..12ad396363 100644 --- a/ompi/mca/pml/ob1/pml_ob1_cuda.c +++ b/ompi/mca/pml/ob1/pml_ob1_cuda.c @@ -56,7 +56,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size) { int rc; -#if OPAL_CUDA_SUPPORT_41 #if OPAL_CUDA_GDR_SUPPORT /* With some BTLs, switch to RNDV from RGET at large messages */ if ((sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) && @@ -95,10 +94,6 @@ int mca_pml_ob1_send_request_start_cuda(mca_pml_ob1_send_request_t* sendreq, sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA; rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); } -#else - /* Just do the rendezvous but set initial data to be sent to zero */ - rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, 0, 0); -#endif /* OPAL_CUDA_SUPPORT_41 */ return rc; } diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index bf966747a5..11500b6668 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -88,13 +88,11 @@ struct cudaFunctionTable { int (*cuEventDestroy)(CUevent); int (*cuStreamWaitEvent)(CUstream, CUevent, unsigned int); int (*cuMemGetAddressRange)(CUdeviceptr*, size_t*, CUdeviceptr); -#if OPAL_CUDA_SUPPORT_41 int (*cuIpcGetEventHandle)(CUipcEventHandle*, CUevent); int (*cuIpcOpenEventHandle)(CUevent*, CUipcEventHandle); int (*cuIpcOpenMemHandle)(CUdeviceptr*, CUipcMemHandle, unsigned int); int (*cuIpcCloseMemHandle)(CUdeviceptr); int (*cuIpcGetMemHandle)(CUipcMemHandle*, CUdeviceptr); -#endif /* OPAL_CUDA_SUPPORT_41 */ int (*cuCtxGetDevice)(CUdevice *); int (*cuDeviceCanAccessPeer)(int *, CUdevice, CUdevice); int (*cuDeviceGet)(CUdevice *, int); @@ -156,7 +154,6 @@ OBJ_CLASS_INSTANCE(common_cuda_mem_regs_t, NULL, NULL); -#if OPAL_CUDA_SUPPORT_41 static int mca_common_cuda_async = 1; static int mca_common_cuda_cumemcpy_async; #if OPAL_ENABLE_DEBUG @@ -223,8 +220,6 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ; #define CUDA_DUMP_EVTHANDLE(a) #endif /* OPAL_ENABLE_DEBUG */ -#endif /* OPAL_CUDA_SUPPORT_41 */ - /* This is a seperate function so we can see these variables with ompi_info and * also set them with the tools interface */ void mca_common_cuda_register_mca_variables(void) @@ -263,7 +258,6 @@ void mca_common_cuda_register_mca_variables(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_common_cuda_warning); -#if OPAL_CUDA_SUPPORT_41 /* Use this flag to test async vs sync copies */ mca_common_cuda_async = 1; (void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async", @@ -280,7 +274,6 @@ void mca_common_cuda_register_mca_variables(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &cuda_event_max); -#endif /* OPAL_CUDA_SUPPORT_41 */ /* Use this flag to test cuMemcpyAsync vs cuMemcpy */ mca_common_cuda_cumemcpy_async = 1; @@ -465,13 +458,11 @@ int mca_common_cuda_stage_one_init(void) OPAL_CUDA_DLSYM(libcuda_handle, cuMemFree); OPAL_CUDA_DLSYM(libcuda_handle, cuMemAlloc); OPAL_CUDA_DLSYM(libcuda_handle, cuMemGetAddressRange); -#if OPAL_CUDA_SUPPORT_41 OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenEventHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcOpenMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcCloseMemHandle); OPAL_CUDA_DLSYM(libcuda_handle, cuIpcGetMemHandle); -#endif /* OPAL_CUDA_SUPPORT_41 */ OPAL_CUDA_DLSYM(libcuda_handle, cuCtxGetDevice); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceCanAccessPeer); OPAL_CUDA_DLSYM(libcuda_handle, cuDeviceGet); @@ -595,7 +586,6 @@ static int mca_common_cuda_stage_three_init(void) return OPAL_ERROR; } -#if OPAL_CUDA_SUPPORT_41 if (true == mca_common_cuda_enabled) { /* Set up an array to store outstanding IPC async copy events */ cuda_event_ipc_num_used = 0; @@ -633,7 +623,6 @@ static int mca_common_cuda_stage_three_init(void) } } -#endif /* OPAL_CUDA_SUPPORT_41 */ if (true == mca_common_cuda_enabled) { /* Set up an array to store outstanding async dtoh events. Used on the * sending side for asynchronous copies. */ @@ -1006,7 +995,6 @@ void mca_common_cuda_unregister(void *ptr, char *msg) { } } -#if OPAL_CUDA_SUPPORT_41 /* * Get the memory handle of a local section of memory that can be sent * to the remote size so it can access the memory. This is the @@ -1739,8 +1727,6 @@ static float mydifftime(opal_timer_t ts_start, opal_timer_t ts_end) { } #endif /* OPAL_ENABLE_DEBUG */ -#endif /* OPAL_CUDA_SUPPORT_41 */ - /* Routines that get plugged into the opal datatype code */ static int mca_common_cuda_is_gpu_buffer(const void *pUserBuf, opal_convertor_t *convertor) {