diff --git a/VERSION b/VERSION index daf0e897c0..87a9f53905 100644 --- a/VERSION +++ b/VERSION @@ -1,5 +1,6 @@ # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. # This is the VERSION file for Open MPI, describing the precise # version of Open MPI in this distribution. The various components of @@ -102,4 +103,5 @@ libopen_pal_so_version=0:0:0 libmca_common_sm_so_version=0:0:0 libmca_common_mx_so_version=0:0:0 libmca_common_portals_so_version=0:0:0 +libmca_common_cuda_so_version=0:0:0 libmca_opal_common_hwloc_so_version=0:0:0 diff --git a/configure.ac b/configure.ac index 78f33903d5..85c2eb087b 100644 --- a/configure.ac +++ b/configure.ac @@ -15,6 +15,7 @@ # Copyright (c) 2006-2011 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -129,6 +130,7 @@ m4_ifdef([project_ompi], # right now. AC_SUBST(libmca_common_sm_so_version) AC_SUBST(libmca_common_mx_so_version) + AC_SUBST(libmca_common_cuda_so_version) AC_SUBST(libmca_common_portals_so_version)]) m4_ifdef([project_orte], [AC_SUBST(libopen_rte_so_version)]) diff --git a/ompi/class/ompi_free_list.c b/ompi/class/ompi_free_list.c index 6793399f7d..d468a705be 100644 --- a/ompi/class/ompi_free_list.c +++ b/ompi/class/ompi_free_list.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -202,7 +203,7 @@ int ompi_free_list_grow(ompi_free_list_t* flist, size_t num_elements) if(elem_size != 0) { mpool_alloc_ptr = (unsigned char *) flist->fl_mpool->mpool_alloc(flist->fl_mpool, num_elements * elem_size, flist->fl_payload_buffer_alignment, - MCA_MPOOL_FLAGS_CACHE_BYPASS, ®); + MCA_MPOOL_FLAGS_CACHE_BYPASS | MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM, ®); if(NULL == mpool_alloc_ptr) { free(alloc_ptr); return OMPI_ERR_TEMP_OUT_OF_RESOURCE; diff --git a/ompi/mca/btl/openib/Makefile.am b/ompi/mca/btl/openib/Makefile.am index 3a6c3b7cf6..36ae1f8e19 100644 --- a/ompi/mca/btl/openib/Makefile.am +++ b/ompi/mca/btl/openib/Makefile.am @@ -11,6 +11,7 @@ # All rights reserved. # Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -118,6 +119,10 @@ mcacomponent_LTLIBRARIES = $(component) mca_btl_openib_la_SOURCES = $(component_sources) mca_btl_openib_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS) mca_btl_openib_la_LIBADD = $(btl_openib_LIBS) +if MCA_ompi_cuda_support +mca_btl_openib_la_LIBADD += \ + $(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la +endif noinst_LTLIBRARIES = $(lib) libmca_btl_openib_la_SOURCES = $(lib_sources) diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 84c8914ca5..385ab42e71 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,6 +71,7 @@ const char *ibv_get_sysfs_path(void); #include "ompi/constants.h" #include "ompi/proc/proc.h" #include "ompi/mca/btl/btl.h" +#include "ompi/mca/common/cuda/common_cuda.h" #include "ompi/mca/mpool/base/base.h" #include "ompi/mca/mpool/rdma/mpool_rdma.h" #include "ompi/mca/btl/base/base.h" @@ -542,6 +544,13 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size, return OMPI_ERR_OUT_OF_RESOURCE; } +#if OMPI_CUDA_SUPPORT + if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { + mca_common_cuda_register(base, size, + openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name); + } +#endif + return OMPI_SUCCESS; } @@ -555,6 +564,14 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg) __func__, strerror(errno))); return OMPI_ERROR; } + +#if OMPI_CUDA_SUPPORT + if (reg->flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { + mca_common_cuda_unregister(openib_reg->base.base, + openib_reg->base.mpool->mpool_component->mpool_version.mca_component_name); + } +#endif + } openib_reg->mr = NULL; return OMPI_SUCCESS; diff --git a/ompi/mca/common/cuda/Makefile.am b/ompi/mca/common/cuda/Makefile.am new file mode 100644 index 0000000000..d0deab6197 --- /dev/null +++ b/ompi/mca/common/cuda/Makefile.am @@ -0,0 +1,110 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# A word of explanation... +# +# This library is linked against various MCA components because the +# support for CUDA device pointers is needed in various places. +# There's two cases: +# +# 1. libmca_common_cuda.la is a shared library. By linking that shared +# library to all components that need it, the OS linker will +# automatically load it into the process as necessary, and there will +# only be one copy (i.e., all the components will share *one* copy of +# the code and data). +# +# 2. libmca_common_cuda.la is a static library. In this case, it will +# be rolled up into the top-level libmpi.la. It will also be rolled +# into each component, but then the component will also be rolled up +# into the upper-level libmpi.la. Linkers universally know how to +# "figure this out" so that we end up with only one copy of the code +# and data. +# +# Note that building this common component statically and linking +# against other dynamic components is *not* supported! + +AM_CPPFLAGS = $(common_cuda_CPPFLAGS) + +# Header files + +headers = \ + common_cuda.h + +# Source files + +sources = \ + common_cuda.c + +dist_pkgdata_DATA = help-mpi-common-cuda.txt + +# As per above, we'll either have an installable or noinst result. +# The installable one should follow the same MCA prefix naming rules +# (i.e., libmca__.la). The noinst one can be named +# whatever it wants, although libmca___noinst.la is +# recommended. + +# To simplify components that link to this library, we will *always* +# have an output libtool library named libmca__.la -- even +# for case 2) described above (i.e., so there's no conditional logic +# necessary in component Makefile.am's that link to this library). +# Hence, if we're creating a noinst version of this library (i.e., +# case 2), we sym link it to the libmca__.la name +# (libtool will do the Right Things under the covers). See the +# all-local and clean-local rules, below, for how this is effected. + +lib_LTLIBRARIES = +noinst_LTLIBRARIES = +comp_inst = libmca_common_cuda.la +comp_noinst = libmca_common_cuda_noinst.la + +if MCA_BUILD_ompi_common_cuda_DSO +lib_LTLIBRARIES += $(comp_inst) +else +noinst_LTLIBRARIES += $(comp_noinst) +endif + +libmca_common_cuda_la_SOURCES = $(headers) $(sources) +libmca_common_cuda_la_LDFLAGS = -version-info $(libmca_common_cuda_so_version) +libmca_common_cuda_la_LIBADD = $(common_cuda_LIBS) + +libmca_common_cuda_noinst_la_SOURCES = $(libmca_common_cuda_la_SOURCES) +libmca_common_cuda_noinst_la_LDFLAGS = $(common_cuda_LDFLAGS) +libmca_common_cuda_noinst_la_LIBADD = $(common_cuda_LIBS) + +# Conditionally install the header files + +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/$(subdir) +ompi_HEADERS = $(headers) +endif + +# These two rules will sym link the "noinst" libtool library filename +# to the installable libtool library filename in the case where we are +# compiling this component statically (case 2), described above). + +all-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \ + fi + +clean-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + fi diff --git a/ompi/mca/common/cuda/common_cuda.c b/ompi/mca/common/cuda/common_cuda.c new file mode 100644 index 0000000000..6e8dfdefb1 --- /dev/null +++ b/ompi/mca/common/cuda/common_cuda.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "opal/align.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/output.h" +#include "orte/util/show_help.h" +#include "common_cuda.h" + +static bool initialized = false; +static int mca_common_cuda_verbose; +static int mca_common_cuda_output = 0; +static bool mca_common_cuda_enabled = false; +static bool mca_common_cuda_register_memory = true; +static bool mca_common_cuda_warning = true; + +void mca_common_cuda_init(void) +{ + int id, value; + CUresult res; + CUcontext cuContext; + + if (initialized) { + return; + } + + /* Set different levels of verbosity in the cuda related code. */ + id = mca_base_param_reg_int_name("mpi", "common_cuda_verbose", + "Set level of common cuda verbosity", + false, false, 0, &mca_common_cuda_verbose); + mca_common_cuda_output = opal_output_open(NULL); + opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose); + + /* Control whether system buffers get CUDA pinned or not. Allows for + * performance analysis. */ + id = mca_base_param_reg_int_name("mpi", "common_cuda_register_memory", + "Whether to cuMemHostRegister preallocated BTL buffers", + false, false, + (int) mca_common_cuda_register_memory, &value); + mca_common_cuda_register_memory = OPAL_INT_TO_BOOL(value); + + /* Control whether we see warnings when CUDA memory registration fails. This is + * useful when CUDA support is configured in, but we are running a regular MPI + * application without CUDA. */ + id = mca_base_param_reg_int_name("mpi", "common_cuda_warning", + "Whether to print warnings when CUDA registration fails", + false, false, + (int) mca_common_cuda_warning, &value); + mca_common_cuda_warning = OPAL_INT_TO_BOOL(value); + + /* Check to see if this process is running in a CUDA context. If + * so, all is good. If not, then disable CUDA support. */ + res = cuCtxGetCurrent(&cuContext); + if (CUDA_SUCCESS != res) { + if (mca_common_cuda_warning) { + orte_show_help("help-mpi-common-cuda.txt", "cuCtxGetCurrent failed", + true, res); + } + mca_common_cuda_enabled = false; + mca_common_cuda_register_memory = false; + initialized = true; + return; + } else { + mca_common_cuda_enabled = true; + opal_output_verbose(20, mca_common_cuda_output, + "CUDA: cuCtxGetCurrent succeeded"); + } + + opal_output_verbose(30, mca_common_cuda_output, + "CUDA: initialized"); + initialized = true; +} + + +/** + * Call the CUDA register function so we pin the memory in the CUDA + * space. + */ +void mca_common_cuda_register(void *ptr, size_t amount, char *msg) { + int res; + + if (!initialized) { + mca_common_cuda_init(); + } + + if (mca_common_cuda_enabled && mca_common_cuda_register_memory) { + res = cuMemHostRegister(ptr, amount, 0); + if (res != CUDA_SUCCESS) { + /* If registering the memory fails, print a message and continue. + * This is not a fatal error. */ + orte_show_help("help-mpi-common-cuda.txt", "cuMemHostRegister failed", + true, ptr, amount, res, msg); + } else { + opal_output_verbose(20, mca_common_cuda_output, + "CUDA: cuMemHostRegister OK on mpool %s: " + "address=%p, bufsize=%d", + msg, ptr, (int)amount); + } + } +} + +/** + * Call the CUDA unregister function so we unpin the memory in the CUDA + * space. + */ +void mca_common_cuda_unregister(void *ptr, char *msg) { + int res; + + if (!initialized) { + mca_common_cuda_init(); + } + + if (mca_common_cuda_enabled && mca_common_cuda_register_memory) { + res = cuMemHostUnregister(ptr); + if (res != CUDA_SUCCESS) { + /* If unregistering the memory fails, print a message and continue. + * This is not a fatal error. */ + orte_show_help("help-mpi-common-cuda.txt", "cuMemHostUnregister failed", + true, ptr, res, msg); + } else { + opal_output_verbose(20, mca_common_cuda_output, + "CUDA: cuMemHostUnregister OK on mpool %s: " + "address=%p", + msg, ptr); + } + } +} diff --git a/ompi/mca/common/cuda/common_cuda.h b/ompi/mca/common/cuda/common_cuda.h new file mode 100644 index 0000000000..b23efbe934 --- /dev/null +++ b/ompi/mca/common/cuda/common_cuda.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MCA_COMMON_CUDA_H +#define OMPI_MCA_COMMON_CUDA_H + +OMPI_DECLSPEC void mca_common_cuda_init(void); + +OMPI_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg); + +OMPI_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg); + +#endif /* OMPI_MCA_COMMON_CUDA_H */ diff --git a/ompi/mca/common/cuda/configure.m4 b/ompi/mca/common/cuda/configure.m4 new file mode 100644 index 0000000000..60acfe4cf2 --- /dev/null +++ b/ompi/mca/common/cuda/configure.m4 @@ -0,0 +1,34 @@ +# -*- shell-script -*- +# +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# If CUDA support was requested, then build the CUDA support library. +# This code checks the variable CUDA_SUPPORT which was set earlier in +# the configure sequence by the opal_configure_options.m4 code. +# + +AC_DEFUN([MCA_ompi_common_cuda_CONFIG],[ + AC_CONFIG_FILES([ompi/mca/common/cuda/Makefile]) + + # Use CUDA_SUPPORT which was filled in by the opal configure code. + AM_CONDITIONAL([MCA_ompi_cuda_support], [test "x$CUDA_SUPPORT" = "x1"]) + AC_DEFINE_UNQUOTED([OMPI_CUDA_SUPPORT],$CUDA_SUPPORT, + [Whether we want cuda memory registration support in OMPI code]) + AS_IF([test "x$CUDA_SUPPORT" = "x1"], + [$1], + [$2]) + + # Copy over the includes and libs needed to build CUDA + common_cuda_CPPFLAGS=$opal_datatype_CPPFLAGS + common_cuda_LIBS=$opal_datatype_LIBS + AC_SUBST([common_cuda_CPPFLAGS]) + AC_SUBST([common_cuda_LIBS]) + +])dnl diff --git a/ompi/mca/common/cuda/help-mpi-common-cuda.txt b/ompi/mca/common/cuda/help-mpi-common-cuda.txt new file mode 100644 index 0000000000..3fcde6a581 --- /dev/null +++ b/ompi/mca/common/cuda/help-mpi-common-cuda.txt @@ -0,0 +1,27 @@ +# -*- text -*- +# +# Copyright (c) 2011 NVIDIA. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +[cuCtxGetCurrent failed] +WARNING: The call to cuCtxGetCurrent() failed while initializing the +CUDA support. Support for CUDA registered memory is disabled. + cuCtxGetCurrent return value: %d + +NOTE: You can turn off this warning by setting the MCA parameter + mpi_common_cuda_warning to 0. +# +[cuMemHostRegister failed] +The call to cuMemHostRegister(%p, %d, 0) failed. + cuMemHostRegister return value: %d + Memory Pool: %s +# +[cuMemHostUnregister failed] +The call to cuMemHostUnregister(%p) failed. + cuMemHostUnregister return value: %d + Memory Pool: %s +# diff --git a/ompi/mca/mpool/mpool.h b/ompi/mca/mpool/mpool.h index 1542389c33..f5f4ea4ba4 100644 --- a/ompi/mca/mpool/mpool.h +++ b/ompi/mca/mpool/mpool.h @@ -11,6 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,6 +37,7 @@ #define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x4 #define MCA_MPOOL_FLAGS_INVALID 0x8 #define MCA_MPOOL_FLAGS_SO_MEM 0x10 +#define MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM 0x20 struct mca_mpool_base_resources_t; diff --git a/ompi/mca/mpool/sm/Makefile.am b/ompi/mca/mpool/sm/Makefile.am index 6badd207b8..5e8383e688 100644 --- a/ompi/mca/mpool/sm/Makefile.am +++ b/ompi/mca/mpool/sm/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -45,6 +46,10 @@ mca_mpool_sm_la_SOURCES = $(sources) mca_mpool_sm_la_LDFLAGS = -module -avoid-version mca_mpool_sm_la_LIBADD = \ $(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la +if MCA_ompi_cuda_support +mca_mpool_sm_la_LIBADD += \ + $(top_ompi_builddir)/ompi/mca/common/cuda/libmca_common_cuda.la +endif noinst_LTLIBRARIES = $(component_noinst) libmca_mpool_sm_la_SOURCES = $(sources) diff --git a/ompi/mca/mpool/sm/mpool_sm_module.c b/ompi/mca/mpool/sm/mpool_sm_module.c index f573f799c3..3319d2621b 100644 --- a/ompi/mca/mpool/sm/mpool_sm_module.c +++ b/ompi/mca/mpool/sm/mpool_sm_module.c @@ -12,6 +12,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,6 +24,7 @@ #include #include "ompi/mca/mpool/sm/mpool_sm.h" #include "ompi/mca/common/sm/common_sm.h" +#include "ompi/mca/common/cuda/common_cuda.h" #ifdef HAVE_UNISTD_H #include #endif @@ -95,6 +97,13 @@ void* mca_mpool_sm_alloc( opal_maffinity_base_bind(&mseg, 1, mpool_sm->mem_node); } +#if OPAL_CUDA_SUPPORT + if (flags & MCA_MPOOL_FLAGS_CUDA_REGISTER_MEM) { + mca_common_cuda_register(mseg.mbs_start_addr, size, + mpool->mpool_component->mpool_version.mca_component_name); + } +#endif + return mseg.mbs_start_addr; } diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 6b00c96ee1..fd510e4b0a 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -394,6 +395,13 @@ mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq, MCA_PML_OB1_HDR_FLAGS_CONTIG); } } else { +#if OPAL_CUDA_SUPPORT + /* Do not send anything with first rendezvous message as copying GPU + * memory into RNDV message is expensive. */ + if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) { + size = 0; + } +#endif rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); } }