Small change to allow CUDA-aware to work with non-reduction nonblocking collectives.

Only used when CUDA-aware feature compiled in. This commit was SVN r32750.
2014-09-17 16:55:01 +00:00 · 2014-09-17 16:55:01 +00:00 · 8db1f89dd1
--- a/ompi/mca/coll/libnbc/nbc_ialltoall.c
+++ b/ompi/mca/coll/libnbc/nbc_ialltoall.c
@ -6,6 +6,7 @@
 *                         rights reserved.
 * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
 *                         reserved.
+ * Copyright (c) 2014      NVIDIA Corporation.  All rights reserved.
 *
 * Author(s): Torsten Hoefler <htor@cs.indiana.edu>
 *
@ -108,7 +109,11 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty
    }

    /* phase 1 - rotate n data blocks upwards into the tmpbuffer */
+#if OPAL_CUDA_SUPPORT
+    if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
+#else
    if(NBC_Type_intrinsic(sendtype)) {
+#endif /* OPAL_CUDA_SUPPORT */
      /* contiguous - just copy (1st copy) */
      memcpy(handle->tmpbuf, (char*)sendbuf+datasize*rank, datasize*(p-rank));
      if(rank != 0) memcpy((char*)handle->tmpbuf+datasize*(p-rank), sendbuf, datasize*(rank));
--- a/ompi/mca/coll/libnbc/nbc_internal.h
+++ b/ompi/mca/coll/libnbc/nbc_internal.h
@ -8,6 +8,7 @@
 * Author(s): Torsten Hoefler <htor@cs.indiana.edu>
 *
 * Copyright (c) 2012      Oracle and/or its affiliates.  All rights reserved.
+ * Copyright (c) 2014      NVIDIA Corporation.  All rights reserved.
 *
 */
 #ifndef __NBC_INTERNAL_H__
@ -20,6 +21,10 @@
 #include "mpi.h"

 #include "coll_libnbc.h"
+#if OPAL_CUDA_SUPPORT
+#include "opal/datatype/opal_convertor.h"
+#include "opal/datatype/opal_datatype_cuda.h"
+#endif /* OPAL_CUDA_SUPPORT */
 #include "ompi/include/ompi/constants.h"
 #include "ompi/request/request.h"
 #include "ompi/datatype/ompi_datatype.h"
@ -483,7 +488,11 @@ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *
  OPAL_PTRDIFF_TYPE ext, lb;
  void *packbuf;

+#if OPAL_CUDA_SUPPORT
+  if((srctype == tgttype) && NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
+#else
  if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
+#endif /* OPAL_CUDA_SUPPORT */
    /* if we have the same types and they are contiguous (intrinsic
     * types are contiguous), we can just use a single memcpy */
    res = ompi_datatype_get_extent(srctype, &lb, &ext);
@ -511,7 +520,11 @@ static inline int NBC_Unpack(void *src, int srccount, MPI_Datatype srctype, void
  int size, pos, res;
  OPAL_PTRDIFF_TYPE ext, lb;

+#if OPAL_CUDA_SUPPORT
+  if(NBC_Type_intrinsic(srctype) && !(opal_cuda_check_bufs((char *)tgt, (char *)src))) {
+#else
  if(NBC_Type_intrinsic(srctype)) {
+#endif /* OPAL_CUDA_SUPPORT */
    /* if we have the same types and they are contiguous (intrinsic
     * types are contiguous), we can just use a single memcpy */
    res = ompi_datatype_get_extent (srctype, &lb, &ext);