1
1

Add some basic support for sending and receiving CUDA device memory. Feature is disabled by default and has no effect on default code paths.

This commit was SVN r24659.
Этот коммит содержится в:
Rolf vandeVaart 2011-04-28 23:05:55 +00:00
родитель 0ff0d20e72
Коммит 2634f6401a
10 изменённых файлов: 379 добавлений и 0 удалений

Просмотреть файл

@ -16,6 +16,7 @@ dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
dnl reserved.
dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
dnl Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
dnl
dnl $COPYRIGHT$
dnl
@ -564,4 +565,78 @@ fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_CRDEBUG], [$ompi_want_prd],
[Whether we want checkpoint/restart enabled debugging functionality or not])
#
# Check to see if user wants CUDA support in datatype and convertor code.
#
AC_ARG_WITH([cuda],
[AC_HELP_STRING([--with-cuda(=DIR)],
[Build cuda support, optionally adding DIR/include, DIR/lib, and DIR/lib64])])
AC_MSG_CHECKING([if --with-cuda is set])
# CUDA support is off by default. User has to request it.
AS_IF([test "$with_cuda" = "no" -o "x$with_cuda" = "x"],
[opal_check_cuda_happy="no"
AC_MSG_RESULT([not set (--with-cuda=$with_cuda)])],
[AS_IF([test "$with_cuda" = "yes"],
[AS_IF([test "x`ls /usr/local/cuda/include/cuda.h 2> /dev/null`" = "x"],
[AC_MSG_RESULT([not found in standard location])
AC_MSG_WARN([Expected file /usr/local/cuda/include/cuda.h not found])
AC_MSG_ERROR([Cannot continue])],
[AC_MSG_RESULT([found])
opal_check_cuda_happy="yes"
with_cuda="/usr/local/cuda"])],
[AS_IF([test ! -d "$with_cuda"],
[AC_MSG_RESULT([not found])
AC_MSG_WARN([Directory $with_cuda not found])
AC_MSG_ERROR([Cannot continue])],
[AS_IF([test "x`ls $with_cuda/include/cuda.h 2> /dev/null`" = "x"],
[AC_MSG_RESULT([not found])
AC_MSG_WARN([Expected file $with_cuda/include/cuda.h not found])
AC_MSG_ERROR([Cannot continue])],
[opal_check_cuda_happy="yes"
AC_MSG_RESULT([found ($with_cuda/include/cuda.h)])])])])])
# Check for optional libdir setting
AC_ARG_WITH([cuda-libdir],
[AC_HELP_STRING([--with-cuda-libdir=DIR],
[Search for cuda libraries in DIR])])
AC_MSG_CHECKING([if --with-cuda-libdir is set])
# Only check for the extra cuda libdir if we have passed the --with-cuda tests.
AS_IF([test "$opal_check_cuda_happy" = "yes"],
[AS_IF([test "$with_cuda_libdir" != "yes" -a "$with_cuda_libdir" != "no" -a "x$with_cuda_libdir" != "x"],
[AS_IF([test ! -d "$with_cuda_libdir"],
[AC_MSG_RESULT([not found])
AC_MSG_WARN([Directory $with_cuda_libdir not found])
AC_MSG_ERROR([Cannot continue])],
[AS_IF([test "x`ls $with_cuda_libdir/libcuda.* 2> /dev/null`" = "x"],
[AC_MSG_RESULT([not found])
AC_MSG_WARN([Expected file $with_cuda_libdir/libcuda.* not found])
AC_MSG_ERROR([Cannot continue])],
[AC_MSG_RESULT([ok - found directory ($with_cuda_libdir)])])])],
[with_cuda_libdir=/usr/lib64
AS_IF([test "x`ls $with_cuda_libdir/libcuda.* 2> /dev/null`" = "x"],
[AC_MSG_RESULT([not found])
AC_MSG_WARN([Expected file $with_cuda_libdir/libcuda.* not found])
AC_MSG_ERROR([Cannot continue])],
[AC_MSG_RESULT([ok - found directory ($with_cuda_libdir)])])])],
[AC_MSG_RESULT([not applicable since --with-cuda is not set])])
AC_MSG_CHECKING([if have cuda support])
if test "$opal_check_cuda_happy" = "yes"; then
AC_MSG_RESULT([yes (-I$with_cuda/include -L$with_cuda_libdir -lcuda)])
CUDA_SUPPORT=1
opal_datatype_CPPFLAGS="-I$with_cuda/include"
opal_datatype_LIBS="-L$with_cuda_libdir -lcuda"
AC_SUBST([opal_datatype_CPPFLAGS])
AC_SUBST([opal_datatype_LIBS])
else
AC_MSG_RESULT([no])
CUDA_SUPPORT=0
fi
AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
[Whether we want cuda device pointer support])
])dnl

Просмотреть файл

@ -14,6 +14,7 @@
# reserved.
# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -71,3 +72,11 @@ if WANT_INSTALL_HEADERS
opaldir = $(includedir)/openmpi/$(subdir)
opal_HEADERS = $(headers)
endif
# If we have cuda support, modify file list and flags
if OPAL_cuda_support
libdatatype_la_SOURCES += opal_datatype_cuda.c
headers += opal_datatype_cuda.h
AM_CPPFLAGS = $(opal_datatype_CPPFLAGS)
libdatatype_la_LIBADD += $(opal_datatype_LIBS)
endif

Просмотреть файл

@ -11,6 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -37,6 +38,11 @@
#include "opal/datatype/opal_datatype_checksum.h"
#include "opal/datatype/opal_datatype_prototypes.h"
#include "opal/datatype/opal_convertor_internal.h"
#if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_datatype_cuda.h"
#define MEMCPY_CUDA( DST, SRC, BLENGTH, CONVERTOR ) \
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH) )
#endif
extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor,
int starting_point, const int* sizes );
@ -48,6 +54,9 @@ static void opal_convertor_construct( opal_convertor_t* convertor )
convertor->partial_length = 0;
convertor->remoteArch = opal_local_arch;
convertor->flags = OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED;
#if OPAL_CUDA_SUPPORT
convertor->cbmemcpy = &memcpy;
#endif
}
@ -235,7 +244,11 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv,
if( OPAL_LIKELY(NULL == iov[i].iov_base) )
iov[i].iov_base = (IOVBASE_TYPE *) base_pointer;
else
#if OPAL_CUDA_SUPPORT
MEMCPY_CUDA( iov[i].iov_base, base_pointer, iov[i].iov_len, pConv );
#else
MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );
#endif
pending_length -= iov[i].iov_len;
base_pointer += iov[i].iov_len;
}
@ -248,7 +261,11 @@ complete_contiguous_data_pack:
if( OPAL_LIKELY(NULL == iov[i].iov_base) )
iov[i].iov_base = (IOVBASE_TYPE *) base_pointer;
else
#if OPAL_CUDA_SUPPORT
MEMCPY_CUDA( iov[i].iov_base, base_pointer, iov[i].iov_len, pConv );
#else
MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );
#endif
pConv->bConverted = pConv->local_size;
*out_size = i + 1;
pConv->flags |= CONVERTOR_COMPLETED;
@ -282,7 +299,11 @@ int32_t opal_convertor_unpack( opal_convertor_t* pConv,
if( iov[i].iov_len >= pending_length ) {
goto complete_contiguous_data_unpack;
}
#if OPAL_CUDA_SUPPORT
MEMCPY_CUDA( base_pointer, iov[i].iov_base, iov[i].iov_len, pConv );
#else
MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );
#endif
pending_length -= iov[i].iov_len;
base_pointer += iov[i].iov_len;
}
@ -292,7 +313,11 @@ int32_t opal_convertor_unpack( opal_convertor_t* pConv,
complete_contiguous_data_unpack:
iov[i].iov_len = pending_length;
#if OPAL_CUDA_SUPPORT
MEMCPY_CUDA( base_pointer, iov[i].iov_base, iov[i].iov_len, pConv );
#else
MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );
#endif
pConv->bConverted = pConv->local_size;
*out_size = i + 1;
pConv->flags |= CONVERTOR_COMPLETED;
@ -519,6 +544,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
/* Here I should check that the data is not overlapping */
convertor->flags |= CONVERTOR_RECV;
#if OPAL_CUDA_SUPPORT
mca_cuda_convertor_init(convertor, pUserBuf);
#endif
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
@ -555,6 +583,9 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
const void* pUserBuf )
{
convertor->flags |= CONVERTOR_SEND;
#if OPAL_CUDA_SUPPORT
mca_cuda_convertor_init(convertor, pUserBuf);
#endif
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
@ -623,6 +654,9 @@ int opal_convertor_clone( const opal_convertor_t* source,
destination->bConverted = source->bConverted;
destination->stack_pos = source->stack_pos;
}
#if OPAL_CUDA_SUPPORT
destination->cbmemcpy = source->cbmemcpy;
#endif
return OPAL_SUCCESS;
}

Просмотреть файл

@ -34,6 +34,9 @@
#ifdef HAVE_NET_UIO_H
#include <net/uio.h>
#endif
#if HAVE_STRING_H
#include <string.h>
#endif
#include "opal/constants.h"
#include "opal/datatype/opal_datatype.h"
@ -51,6 +54,7 @@ BEGIN_C_DECLS
#define CONVERTOR_HOMOGENEOUS 0x00080000
#define CONVERTOR_NO_OP 0x00100000
#define CONVERTOR_WITH_CHECKSUM 0x00200000
#define CONVERTOR_CUDA 0x00400000
#define CONVERTOR_TYPE_MASK 0x00FF0000
#define CONVERTOR_STATE_START 0x01000000
#define CONVERTOR_STATE_COMPLETE 0x02000000
@ -65,6 +69,7 @@ typedef int32_t (*convertor_advance_fct_t)( opal_convertor_t* pConvertor,
uint32_t* out_size,
size_t* max_data );
typedef void*(*memalloc_fct_t)( size_t* pLength, void* userdata );
typedef void*(*memcpy_fct_t)( void* dest, const void* src, size_t n );
/* The master convertor struct (defined in convertor_internal.h) */
struct opal_convertor_master_t;
@ -109,6 +114,9 @@ struct opal_convertor_t {
dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */
/* --- cacheline 3 boundary (192 bytes) was 56 bytes ago --- */
#if OPAL_CUDA_SUPPORT
memcpy_fct_t cbmemcpy; /**< memcpy or cuMemcpy */
#endif
/* size: 248, cachelines: 4, members: 20 */
/* last cacheline: 56 bytes */
};
@ -156,6 +164,9 @@ static inline int opal_convertor_cleanup( opal_convertor_t* convertor )
convertor->pStack = convertor->static_stack;
convertor->stack_size = DT_STATIC_STACK_SIZE;
}
#if OPAL_CUDA_SUPPORT
convertor->cbmemcpy = &memcpy;
#endif
convertor->pDesc = NULL;
convertor->stack_pos = 0;
convertor->flags = OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED;
@ -176,6 +187,9 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv
{
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
#endif
#if OPAL_CUDA_SUPPORT
if( pConvertor->flags & CONVERTOR_CUDA ) return 1;
#endif
if( pConvertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS ) return 0;
if( (pConvertor->count == 1) && (pConvertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) return 0;

Просмотреть файл

@ -11,6 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -71,6 +72,31 @@ static size_t opal_datatype_memop_block_size = 128 * 1024;
#define MEM_OP MEMMOVE
#include "opal_datatype_copy.h"
#if OPAL_CUDA_SUPPORT
#include "opal_datatype_cuda.h"
#undef MEM_OP_NAME
#define MEM_OP_NAME non_overlap_cuda
#undef MEM_OP
#define MEM_OP opal_cuda_memcpy
#include "opal_datatype_copy.h"
#undef MEM_OP_NAME
#define MEM_OP_NAME overlap_cuda
#undef MEM_OP
#define MEM_OP opal_cuda_memmove
#include "opal_datatype_copy.h"
#define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function) \
do { \
if (true == cuda_device_bufs) { \
fct = copy_function; \
} \
} while(0)
#else
#define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function)
#endif
int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* datatype, int32_t count,
char* destination_base, char* source_base )
{
@ -78,6 +104,10 @@ int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* datatype, in
size_t iov_len_local;
int32_t (*fct)( const opal_datatype_t*, int32_t, char*, char*);
#if OPAL_CUDA_SUPPORT
bool cuda_device_bufs = opal_cuda_check_bufs(destination_base, source_base);
#endif
DO_DEBUG( opal_output( 0, "opal_datatype_copy_content_same_ddt( %p, %d, dst %p, src %p )\n",
(void*)datatype, count, destination_base, source_base ); );
@ -95,15 +125,18 @@ int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* datatype, in
extent = (datatype->true_ub - datatype->true_lb) + (count - 1) * (datatype->ub - datatype->lb);
fct = non_overlap_copy_content_same_ddt;
SET_CUDA_COPY_FCT(cuda_device_bufs, fct, non_overlap_cuda_copy_content_same_ddt);
if( destination_base < source_base ) {
if( (destination_base + extent) > source_base ) {
/* memmove */
fct = overlap_copy_content_same_ddt;
SET_CUDA_COPY_FCT(cuda_device_bufs, fct, overlap_cuda_copy_content_same_ddt);
}
} else {
if( (source_base + extent) > destination_base ) {
/* memmove */
fct = overlap_copy_content_same_ddt;
SET_CUDA_COPY_FCT(cuda_device_bufs, fct, overlap_cuda_copy_content_same_ddt);
}
}
return fct( datatype, count, destination_base, source_base );

154
opal/datatype/opal_datatype_cuda.c Исполняемый файл
Просмотреть файл

@ -0,0 +1,154 @@
/*
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <errno.h>
#include <unistd.h>
#include <cuda.h>
#include "opal/align.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/datatype/opal_datatype_cuda.h"
static bool initialized = false;
static int opal_cuda_verbose;
static int opal_cuda_output = 0;
static void opal_cuda_support_init(void);
void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf)
{
int res;
CUmemorytype memType;
CUdeviceptr dbuf = (CUdeviceptr)pUserBuf;
if (!initialized) {
opal_cuda_support_init();
}
res = cuPointerGetAttribute(&memType,
CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
if (res != CUDA_SUCCESS) {
/* If we cannot determine it is device pointer,
* just assume it is not. */
return;
} else if (memType == CU_MEMORYTYPE_HOST) {
/* Host memory, nothing to do here */
return;
}
/* Must be a device pointer */
assert(memType == CU_MEMORYTYPE_DEVICE);
convertor->cbmemcpy = (memcpy_fct_t)&opal_cuda_memcpy;
convertor->flags |= CONVERTOR_CUDA;
}
/* Checks the type of pointer
*
* @param dest One pointer to check
* @param source Another pointer to check
*/
bool opal_cuda_check_bufs(char *dest, char *src)
{
int res;
CUmemorytype memType;
res = cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)dest);
if( memType == CU_MEMORYTYPE_DEVICE){
return true;
}
res = cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)src);
if( memType == CU_MEMORYTYPE_DEVICE){
return true;
}
/* Assuming it is a host pointer for all other situations */
return false;
}
/*
* Need intermediate cuMemcpy function so we can check the return code
* of the call. If we see an error, abort as there is no recovery at
* this point.
*/
void *opal_cuda_memcpy(void *dest, void *src, size_t size)
{
int res;
res = cuMemcpy((CUdeviceptr)dest, (CUdeviceptr)src, size);
if (res != CUDA_SUCCESS) {
opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
res, dest, src, (int)size);
abort();
} else {
return dest;
}
}
/*
* In some cases, need an implementation of memmove. This is not fast, but
* it is not often needed.
*/
void *opal_cuda_memmove(void *dest, void *src, size_t size)
{
CUdeviceptr tmp;
int res;
res = cuMemAlloc(&tmp,size);
res = cuMemcpy(tmp, (CUdeviceptr) src, size);
if(res != CUDA_SUCCESS){
opal_output(0, "CUDA: memmove-Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
res, (void *)tmp, src, (int)size);
abort();
}
res = cuMemcpy((CUdeviceptr) dest, tmp, size);
if(res != CUDA_SUCCESS){
opal_output(0, "CUDA: memmove-Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
res, dest, (void *)tmp, (int)size);
abort();
}
cuMemFree(tmp);
return dest;
}
/**
* This function gets called once to check if the program is running in a cuda
* environment.
*/
static void opal_cuda_support_init(void)
{
int id;
CUresult res;
CUcontext cuContext;
if (initialized) {
return;
}
/* Set different levels of verbosity in the cuda related code. */
id = mca_base_param_reg_int_name("opal", "cuda_verbose",
"Set level of opal cuda verbosity",
false, false, 0, &opal_cuda_verbose);
opal_cuda_output = opal_output_open(NULL);
opal_output_set_verbosity(opal_cuda_output, opal_cuda_verbose);
/* Check to see if this process is running in a CUDA context. If so,
* all is good. Currently, just print out a message in verbose mode
* to help with debugging. */
res = cuCtxGetCurrent(&cuContext);
if (CUDA_SUCCESS != res) {
opal_output_verbose(10, opal_cuda_output,
"CUDA: cuCtxGetCurrent failed, CUDA device pointers will not work");
} else {
opal_output_verbose(10, opal_cuda_output,
"CUDA: cuCtxGetCurrent succeeded, CUDA device pointers will work");
}
initialized = true;
}

20
opal/datatype/opal_datatype_cuda.h Исполняемый файл
Просмотреть файл

@ -0,0 +1,20 @@
/*
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _OPAL_DATATYPE_CUDA_H
#define _OPAL_DATATYPE_CUDA_H
#include "cuda.h"
void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf);
bool opal_cuda_check_bufs(char *dest, char *src);
void* opal_cuda_memcpy(void * dest, void * src, size_t size);
void* opal_cuda_memmove(void * dest, void * src, size_t size);
#endif

Просмотреть файл

@ -4,6 +4,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,6 +22,13 @@
#include <stdint.h>
#endif
#if !defined(CHECKSUM) && OPAL_CUDA_SUPPORT
/* Make use of existing macro to do CUDA style memcpy */
#undef MEMCPY_CSUM
#define MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH) )
#endif
static inline void pack_predefined_data( opal_convertor_t* CONVERTOR,
dt_elem_desc_t* ELEM,
uint32_t* COUNT,

Просмотреть файл

@ -11,6 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -318,8 +319,16 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
memset( temporary, unused_byte, data_length );
MEMCPY( temporary + start_position, partial_data, (end_position - start_position) );
#if OPAL_CUDA_SUPPORT
/* In the case where the data is being unpacked from device
* memory, need to use the special host to device memory copy.
* Note this code path was only seen on large receives of
* noncontiguous data via buffered sends. */
pConvertor->cbmemcpy(saved_data, real_data, data_length );
#else
/* Save the content of the user memory */
MEMCPY( saved_data, real_data, data_length );
#endif
/* Then unpack the data into the user memory */
UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
@ -331,10 +340,25 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
/* For every occurence of the unused byte move data from the saved
* buffer back into the user memory.
*/
#if OPAL_CUDA_SUPPORT
/* Need to copy the modified real_data again so we can see which
* bytes need to be converted back to their original values. Note
* this code path was only seen on large receives of noncontiguous
* data via buffered sends. */
{
char resaved_data[16];
pConvertor->cbmemcpy(resaved_data, real_data, data_length );
for( i = 0; i < data_length; i++ ) {
if( unused_byte == resaved_data[i] )
pConvertor->cbmemcpy(&real_data[i], &saved_data[i], 1);
}
}
#else
for( i = 0; i < data_length; i++ ) {
if( unused_byte == real_data[i] )
real_data[i] = saved_data[i];
}
#endif
return 0;
}

Просмотреть файл

@ -4,6 +4,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,6 +22,13 @@
#include <stdint.h>
#endif
#if !defined(CHECKSUM) && OPAL_CUDA_SUPPORT
/* Make use of existing macro to do CUDA style memcpy */
#undef MEMCPY_CSUM
#define MEMCPY_CSUM( DST, SRC, BLENGTH, CONVERTOR ) \
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH) )
#endif
#include "opal/datatype/opal_convertor.h"