Merge pull request #4143 from aravindksg/psm2_cuda
Add support for GPU buffers for PSM2 MTL
Этот коммит содержится в:
Коммит
c1ce233eaf
@ -5,6 +5,7 @@
|
||||
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -61,6 +62,9 @@ typedef struct mca_mtl_request_t mca_mtl_request_t;
|
||||
* MTL module flags
|
||||
*/
|
||||
#define MCA_MTL_BASE_FLAG_REQUIRE_WORLD 0x00000001
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#define MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE 0x00000002
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialization routine for MTL component
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
@ -100,6 +100,9 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
||||
char *generated_key;
|
||||
char env_string[256];
|
||||
int rc;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
char *cuda_env;
|
||||
#endif
|
||||
|
||||
generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
|
||||
memset(uu, 0, sizeof(psm2_uuid_t));
|
||||
@ -173,6 +176,15 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
|
||||
/* register the psm2 progress function */
|
||||
opal_progress_register(ompi_mtl_psm2_progress);
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
ompi_mtl_psm2.super.mtl_flags |= MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE;
|
||||
|
||||
cuda_env = getenv("PSM2_CUDA");
|
||||
if (!cuda_env || ( strcmp(cuda_env, "0") == 0) )
|
||||
opal_output(0, "Warning: If running with device buffers, there is a"
|
||||
" chance the application might fail. Try setting PSM2_CUDA=1.\n");
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -79,6 +80,7 @@ mca_pml_cm_irecv_init(void *addr,
|
||||
struct ompi_request_t **request)
|
||||
{
|
||||
mca_pml_cm_hvy_recv_request_t *recvreq;
|
||||
uint32_t flags = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
ompi_proc_t* ompi_proc;
|
||||
#endif
|
||||
@ -87,7 +89,7 @@ mca_pml_cm_irecv_init(void *addr,
|
||||
if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
MCA_PML_CM_HVY_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, tag, src,
|
||||
datatype, addr, count, true);
|
||||
datatype, addr, count, flags, true);
|
||||
|
||||
*request = (ompi_request_t*) recvreq;
|
||||
|
||||
@ -104,6 +106,7 @@ mca_pml_cm_irecv(void *addr,
|
||||
struct ompi_request_t **request)
|
||||
{
|
||||
int ret;
|
||||
uint32_t flags = 0;
|
||||
mca_pml_cm_thin_recv_request_t *recvreq;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
ompi_proc_t* ompi_proc = NULL;
|
||||
@ -118,7 +121,8 @@ mca_pml_cm_irecv(void *addr,
|
||||
src,
|
||||
datatype,
|
||||
addr,
|
||||
count);
|
||||
count,
|
||||
flags);
|
||||
|
||||
MCA_PML_CM_THIN_RECV_REQUEST_START(recvreq, comm, tag, src, ret);
|
||||
|
||||
@ -145,6 +149,7 @@ mca_pml_cm_recv(void *addr,
|
||||
ompi_status_public_t * status)
|
||||
{
|
||||
int ret;
|
||||
uint32_t flags = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
ompi_proc_t *ompi_proc;
|
||||
#endif
|
||||
@ -173,20 +178,24 @@ mca_pml_cm_recv(void *addr,
|
||||
ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
}
|
||||
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count);
|
||||
|
||||
opal_convertor_copy_and_prepare_for_recv(
|
||||
ompi_proc->super.proc_convertor,
|
||||
&(datatype->super),
|
||||
count,
|
||||
addr,
|
||||
0,
|
||||
flags,
|
||||
&convertor );
|
||||
#else
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count);
|
||||
|
||||
opal_convertor_copy_and_prepare_for_recv(
|
||||
ompi_mpi_local_convertor,
|
||||
&(datatype->super),
|
||||
count,
|
||||
addr,
|
||||
0,
|
||||
flags,
|
||||
&convertor );
|
||||
#endif
|
||||
|
||||
@ -222,6 +231,7 @@ mca_pml_cm_isend_init(const void* buf,
|
||||
ompi_request_t** request)
|
||||
{
|
||||
mca_pml_cm_hvy_send_request_t *sendreq;
|
||||
uint32_t flags = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
ompi_proc_t* ompi_proc;
|
||||
#endif
|
||||
@ -230,7 +240,7 @@ mca_pml_cm_isend_init(const void* buf,
|
||||
if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, ompi_proc, comm, tag, dst,
|
||||
datatype, sendmode, true, false, buf, count);
|
||||
datatype, sendmode, true, false, buf, count, flags);
|
||||
|
||||
/* Work around a leak in start by marking this request as complete. The
|
||||
* problem occured because we do not have a way to differentiate an
|
||||
@ -254,6 +264,7 @@ mca_pml_cm_isend(const void* buf,
|
||||
ompi_request_t** request)
|
||||
{
|
||||
int ret;
|
||||
uint32_t flags = 0;
|
||||
|
||||
if(sendmode == MCA_PML_BASE_SEND_BUFFERED ) {
|
||||
mca_pml_cm_hvy_send_request_t* sendreq;
|
||||
@ -274,7 +285,8 @@ mca_pml_cm_isend(const void* buf,
|
||||
false,
|
||||
false,
|
||||
buf,
|
||||
count);
|
||||
count,
|
||||
flags);
|
||||
|
||||
MCA_PML_CM_HVY_SEND_REQUEST_START( sendreq, ret);
|
||||
|
||||
@ -296,7 +308,8 @@ mca_pml_cm_isend(const void* buf,
|
||||
datatype,
|
||||
sendmode,
|
||||
buf,
|
||||
count);
|
||||
count,
|
||||
flags);
|
||||
|
||||
MCA_PML_CM_THIN_SEND_REQUEST_START(
|
||||
sendreq,
|
||||
@ -324,6 +337,7 @@ mca_pml_cm_send(const void *buf,
|
||||
ompi_communicator_t* comm)
|
||||
{
|
||||
int ret = OMPI_ERROR;
|
||||
uint32_t flags = 0;
|
||||
ompi_proc_t * ompi_proc;
|
||||
|
||||
if(sendmode == MCA_PML_BASE_SEND_BUFFERED) {
|
||||
@ -342,7 +356,8 @@ mca_pml_cm_send(const void *buf,
|
||||
false,
|
||||
false,
|
||||
buf,
|
||||
count);
|
||||
count,
|
||||
flags);
|
||||
MCA_PML_CM_HVY_SEND_REQUEST_START(sendreq, ret);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
MCA_PML_CM_HVY_SEND_REQUEST_RETURN(sendreq);
|
||||
@ -368,9 +383,12 @@ mca_pml_cm_send(const void *buf,
|
||||
#endif
|
||||
{
|
||||
ompi_proc = ompi_comm_peer_lookup(comm, dst);
|
||||
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count);
|
||||
|
||||
opal_convertor_copy_and_prepare_for_send(
|
||||
ompi_proc->super.proc_convertor,
|
||||
&datatype->super, count, buf, 0,
|
||||
&datatype->super, count, buf, flags,
|
||||
&convertor);
|
||||
}
|
||||
|
||||
@ -459,6 +477,7 @@ mca_pml_cm_imrecv(void *buf,
|
||||
struct ompi_request_t **request)
|
||||
{
|
||||
int ret;
|
||||
uint32_t flags = 0;
|
||||
mca_pml_cm_thin_recv_request_t *recvreq;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
ompi_proc_t* ompi_proc;
|
||||
@ -474,7 +493,8 @@ mca_pml_cm_imrecv(void *buf,
|
||||
(*message)->peer,
|
||||
datatype,
|
||||
buf,
|
||||
count);
|
||||
count,
|
||||
flags);
|
||||
|
||||
MCA_PML_CM_THIN_RECV_REQUEST_MATCHED_START(recvreq, message, ret);
|
||||
|
||||
@ -491,6 +511,7 @@ mca_pml_cm_mrecv(void *buf,
|
||||
ompi_status_public_t* status)
|
||||
{
|
||||
int ret;
|
||||
uint32_t flags = 0;
|
||||
mca_pml_cm_thin_recv_request_t *recvreq;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
ompi_proc_t* ompi_proc;
|
||||
@ -506,7 +527,8 @@ mca_pml_cm_mrecv(void *buf,
|
||||
(*message)->peer,
|
||||
datatype,
|
||||
buf,
|
||||
count);
|
||||
count,
|
||||
flags);
|
||||
|
||||
MCA_PML_CM_THIN_RECV_REQUEST_MATCHED_START(recvreq,
|
||||
message, ret);
|
||||
|
@ -13,6 +13,7 @@
|
||||
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -92,7 +93,8 @@ do { \
|
||||
src, \
|
||||
datatype, \
|
||||
addr, \
|
||||
count ) \
|
||||
count, \
|
||||
flags ) \
|
||||
do { \
|
||||
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, false); \
|
||||
(request)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
@ -108,12 +110,13 @@ do { \
|
||||
} else { \
|
||||
ompi_proc = ompi_comm_peer_lookup( comm, src ); \
|
||||
} \
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_recv( \
|
||||
ompi_proc->super.proc_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
addr, \
|
||||
0, \
|
||||
flags, \
|
||||
&(request)->req_base.req_convertor ); \
|
||||
} while(0)
|
||||
#else
|
||||
@ -123,7 +126,8 @@ do { \
|
||||
src, \
|
||||
datatype, \
|
||||
addr, \
|
||||
count ) \
|
||||
count, \
|
||||
flags ) \
|
||||
do { \
|
||||
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, false); \
|
||||
(request)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
@ -134,12 +138,13 @@ do { \
|
||||
OBJ_RETAIN(comm); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
\
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_recv( \
|
||||
ompi_mpi_local_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
addr, \
|
||||
0, \
|
||||
flags, \
|
||||
&(request)->req_base.req_convertor ); \
|
||||
} while(0)
|
||||
#endif
|
||||
@ -153,6 +158,7 @@ do { \
|
||||
datatype, \
|
||||
addr, \
|
||||
count, \
|
||||
flags, \
|
||||
persistent) \
|
||||
do { \
|
||||
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \
|
||||
@ -173,12 +179,13 @@ do { \
|
||||
} else { \
|
||||
ompi_proc = ompi_comm_peer_lookup( comm, src ); \
|
||||
} \
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_recv( \
|
||||
ompi_proc->super.proc_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
addr, \
|
||||
0, \
|
||||
flags, \
|
||||
&(request)->req_base.req_convertor ); \
|
||||
} while(0)
|
||||
#else
|
||||
@ -190,6 +197,7 @@ do { \
|
||||
datatype, \
|
||||
addr, \
|
||||
count, \
|
||||
flags, \
|
||||
persistent) \
|
||||
do { \
|
||||
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \
|
||||
@ -205,12 +213,13 @@ do { \
|
||||
OBJ_RETAIN(comm); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
\
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_recv( \
|
||||
ompi_mpi_local_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
addr, \
|
||||
0, \
|
||||
flags, \
|
||||
&(request)->req_base.req_convertor ); \
|
||||
} while(0)
|
||||
#endif
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -53,4 +54,20 @@ struct mca_pml_cm_request_t {
|
||||
typedef struct mca_pml_cm_request_t mca_pml_cm_request_t;
|
||||
OBJ_CLASS_DECLARATION(mca_pml_cm_request_t);
|
||||
|
||||
/*
|
||||
* Avoid CUDA convertor inits only for contiguous memory and if indicated by
|
||||
* the MTL. For non-contiguous memory, do not skip CUDA convertor init phases.
|
||||
*/
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#define MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count) \
|
||||
{ \
|
||||
if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count) \
|
||||
&& (ompi_mtl->mtl_flags & MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE)) { \
|
||||
flags |= CONVERTOR_SKIP_CUDA_INIT; \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -14,6 +14,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -125,18 +126,20 @@ do { \
|
||||
datatype, \
|
||||
sendmode, \
|
||||
buf, \
|
||||
count) \
|
||||
count, \
|
||||
flags ) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
(req_send)->req_base.req_comm = comm; \
|
||||
(req_send)->req_base.req_datatype = datatype; \
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
ompi_proc->super.proc_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
buf, \
|
||||
0, \
|
||||
flags, \
|
||||
&(req_send)->req_base.req_convertor ); \
|
||||
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
(req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \
|
||||
@ -154,18 +157,20 @@ do { \
|
||||
datatype, \
|
||||
sendmode, \
|
||||
buf, \
|
||||
count) \
|
||||
count, \
|
||||
flags ) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
(req_send)->req_base.req_comm = comm; \
|
||||
(req_send)->req_base.req_datatype = datatype; \
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
ompi_mpi_local_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
buf, \
|
||||
0, \
|
||||
flags, \
|
||||
&(req_send)->req_base.req_convertor ); \
|
||||
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
(req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \
|
||||
@ -185,18 +190,20 @@ do { \
|
||||
datatype, \
|
||||
sendmode, \
|
||||
buf, \
|
||||
count) \
|
||||
count, \
|
||||
flags ) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
(req_send)->req_base.req_comm = comm; \
|
||||
(req_send)->req_base.req_datatype = datatype; \
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
ompi_proc->super.proc_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
buf, \
|
||||
0, \
|
||||
flags, \
|
||||
&(req_send)->req_base.req_convertor ); \
|
||||
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
(req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \
|
||||
@ -215,7 +222,8 @@ do { \
|
||||
datatype, \
|
||||
sendmode, \
|
||||
buf, \
|
||||
count) \
|
||||
count, \
|
||||
flags ) \
|
||||
{ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OMPI_DATATYPE_RETAIN(datatype); \
|
||||
@ -235,12 +243,13 @@ do { \
|
||||
(req_send)->req_base.req_convertor.count = count; \
|
||||
(req_send)->req_base.req_convertor.pDesc = &datatype->super; \
|
||||
} else { \
|
||||
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
|
||||
opal_convertor_copy_and_prepare_for_send( \
|
||||
ompi_mpi_local_convertor, \
|
||||
&(datatype->super), \
|
||||
count, \
|
||||
buf, \
|
||||
0, \
|
||||
flags, \
|
||||
&(req_send)->req_base.req_convertor ); \
|
||||
} \
|
||||
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
@ -263,7 +272,8 @@ do { \
|
||||
persistent, \
|
||||
blocking, \
|
||||
buf, \
|
||||
count) \
|
||||
count, \
|
||||
flags ) \
|
||||
do { \
|
||||
OMPI_REQUEST_INIT(&(sendreq->req_send.req_base.req_ompi), \
|
||||
persistent); \
|
||||
@ -278,7 +288,8 @@ do { \
|
||||
datatype, \
|
||||
sendmode, \
|
||||
buf, \
|
||||
count); \
|
||||
count, \
|
||||
flags ) \
|
||||
opal_convertor_get_packed_size( \
|
||||
&sendreq->req_send.req_base.req_convertor, \
|
||||
&sendreq->req_count ); \
|
||||
@ -297,7 +308,8 @@ do { \
|
||||
datatype, \
|
||||
sendmode, \
|
||||
buf, \
|
||||
count) \
|
||||
count, \
|
||||
flags ) \
|
||||
do { \
|
||||
OMPI_REQUEST_INIT(&(sendreq->req_send.req_base.req_ompi), \
|
||||
false); \
|
||||
@ -308,7 +320,8 @@ do { \
|
||||
datatype, \
|
||||
sendmode, \
|
||||
buf, \
|
||||
count); \
|
||||
count, \
|
||||
flags); \
|
||||
sendreq->req_send.req_base.req_pml_complete = false; \
|
||||
} while(0)
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -569,7 +570,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
|
||||
|
||||
convertor->flags |= CONVERTOR_RECV;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_cuda_convertor_init(convertor, pUserBuf);
|
||||
if (!( convertor->flags & CONVERTOR_SKIP_CUDA_INIT )) {
|
||||
mca_cuda_convertor_init(convertor, pUserBuf);
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(! (convertor->flags & CONVERTOR_SEND));
|
||||
@ -607,7 +610,9 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
|
||||
{
|
||||
convertor->flags |= CONVERTOR_SEND;
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
mca_cuda_convertor_init(convertor, pUserBuf);
|
||||
if (!( convertor->flags & CONVERTOR_SKIP_CUDA_INIT )) {
|
||||
mca_cuda_convertor_init(convertor, pUserBuf);
|
||||
}
|
||||
#endif
|
||||
|
||||
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
|
||||
|
@ -14,6 +14,7 @@
|
||||
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -55,6 +56,7 @@ BEGIN_C_DECLS
|
||||
#define CONVERTOR_COMPLETED 0x08000000
|
||||
#define CONVERTOR_CUDA_UNIFIED 0x10000000
|
||||
#define CONVERTOR_HAS_REMOTE_SIZE 0x20000000
|
||||
#define CONVERTOR_SKIP_CUDA_INIT 0x40000000
|
||||
|
||||
union dt_elem_desc;
|
||||
typedef struct opal_convertor_t opal_convertor_t;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user