1
1

Merge pull request #4143 from aravindksg/psm2_cuda

Add support for GPU buffers for PSM2 MTL
Этот коммит содержится в:
Ralph Castain 2017-09-01 21:09:55 -07:00 коммит произвёл GitHub
родитель 7b22207599 2e83cf15ce
Коммит c1ce233eaf
8 изменённых файлов: 116 добавлений и 32 удалений

Просмотреть файл

@ -5,6 +5,7 @@
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -61,6 +62,9 @@ typedef struct mca_mtl_request_t mca_mtl_request_t;
* MTL module flags
*/
#define MCA_MTL_BASE_FLAG_REQUIRE_WORLD 0x00000001
#if OPAL_CUDA_SUPPORT
#define MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE 0x00000002
#endif
/**
* Initialization routine for MTL component

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Research Organization for Information Science
@ -100,6 +100,9 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
char *generated_key;
char env_string[256];
int rc;
#if OPAL_CUDA_SUPPORT
char *cuda_env;
#endif
generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
memset(uu, 0, sizeof(psm2_uuid_t));
@ -173,6 +176,15 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
/* register the psm2 progress function */
opal_progress_register(ompi_mtl_psm2_progress);
#if OPAL_CUDA_SUPPORT
ompi_mtl_psm2.super.mtl_flags |= MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE;
cuda_env = getenv("PSM2_CUDA");
if (!cuda_env || ( strcmp(cuda_env, "0") == 0) )
opal_output(0, "Warning: If running with device buffers, there is a"
" chance the application might fail. Try setting PSM2_CUDA=1.\n");
#endif
return OMPI_SUCCESS;
}

Просмотреть файл

@ -6,6 +6,7 @@
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -79,6 +80,7 @@ mca_pml_cm_irecv_init(void *addr,
struct ompi_request_t **request)
{
mca_pml_cm_hvy_recv_request_t *recvreq;
uint32_t flags = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
ompi_proc_t* ompi_proc;
#endif
@ -87,7 +89,7 @@ mca_pml_cm_irecv_init(void *addr,
if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE;
MCA_PML_CM_HVY_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, tag, src,
datatype, addr, count, true);
datatype, addr, count, flags, true);
*request = (ompi_request_t*) recvreq;
@ -104,6 +106,7 @@ mca_pml_cm_irecv(void *addr,
struct ompi_request_t **request)
{
int ret;
uint32_t flags = 0;
mca_pml_cm_thin_recv_request_t *recvreq;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
ompi_proc_t* ompi_proc = NULL;
@ -118,7 +121,8 @@ mca_pml_cm_irecv(void *addr,
src,
datatype,
addr,
count);
count,
flags);
MCA_PML_CM_THIN_RECV_REQUEST_START(recvreq, comm, tag, src, ret);
@ -145,6 +149,7 @@ mca_pml_cm_recv(void *addr,
ompi_status_public_t * status)
{
int ret;
uint32_t flags = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
ompi_proc_t *ompi_proc;
#endif
@ -173,20 +178,24 @@ mca_pml_cm_recv(void *addr,
ompi_proc = ompi_comm_peer_lookup( comm, src );
}
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count);
opal_convertor_copy_and_prepare_for_recv(
ompi_proc->super.proc_convertor,
&(datatype->super),
count,
addr,
0,
flags,
&convertor );
#else
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count);
opal_convertor_copy_and_prepare_for_recv(
ompi_mpi_local_convertor,
&(datatype->super),
count,
addr,
0,
flags,
&convertor );
#endif
@ -222,6 +231,7 @@ mca_pml_cm_isend_init(const void* buf,
ompi_request_t** request)
{
mca_pml_cm_hvy_send_request_t *sendreq;
uint32_t flags = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
ompi_proc_t* ompi_proc;
#endif
@ -230,7 +240,7 @@ mca_pml_cm_isend_init(const void* buf,
if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE;
MCA_PML_CM_HVY_SEND_REQUEST_INIT(sendreq, ompi_proc, comm, tag, dst,
datatype, sendmode, true, false, buf, count);
datatype, sendmode, true, false, buf, count, flags);
/* Work around a leak in start by marking this request as complete. The
* problem occured because we do not have a way to differentiate an
@ -254,6 +264,7 @@ mca_pml_cm_isend(const void* buf,
ompi_request_t** request)
{
int ret;
uint32_t flags = 0;
if(sendmode == MCA_PML_BASE_SEND_BUFFERED ) {
mca_pml_cm_hvy_send_request_t* sendreq;
@ -274,7 +285,8 @@ mca_pml_cm_isend(const void* buf,
false,
false,
buf,
count);
count,
flags);
MCA_PML_CM_HVY_SEND_REQUEST_START( sendreq, ret);
@ -296,7 +308,8 @@ mca_pml_cm_isend(const void* buf,
datatype,
sendmode,
buf,
count);
count,
flags);
MCA_PML_CM_THIN_SEND_REQUEST_START(
sendreq,
@ -324,6 +337,7 @@ mca_pml_cm_send(const void *buf,
ompi_communicator_t* comm)
{
int ret = OMPI_ERROR;
uint32_t flags = 0;
ompi_proc_t * ompi_proc;
if(sendmode == MCA_PML_BASE_SEND_BUFFERED) {
@ -342,7 +356,8 @@ mca_pml_cm_send(const void *buf,
false,
false,
buf,
count);
count,
flags);
MCA_PML_CM_HVY_SEND_REQUEST_START(sendreq, ret);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
MCA_PML_CM_HVY_SEND_REQUEST_RETURN(sendreq);
@ -368,9 +383,12 @@ mca_pml_cm_send(const void *buf,
#endif
{
ompi_proc = ompi_comm_peer_lookup(comm, dst);
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count);
opal_convertor_copy_and_prepare_for_send(
ompi_proc->super.proc_convertor,
&datatype->super, count, buf, 0,
&datatype->super, count, buf, flags,
&convertor);
}
@ -459,6 +477,7 @@ mca_pml_cm_imrecv(void *buf,
struct ompi_request_t **request)
{
int ret;
uint32_t flags = 0;
mca_pml_cm_thin_recv_request_t *recvreq;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
ompi_proc_t* ompi_proc;
@ -474,7 +493,8 @@ mca_pml_cm_imrecv(void *buf,
(*message)->peer,
datatype,
buf,
count);
count,
flags);
MCA_PML_CM_THIN_RECV_REQUEST_MATCHED_START(recvreq, message, ret);
@ -491,6 +511,7 @@ mca_pml_cm_mrecv(void *buf,
ompi_status_public_t* status)
{
int ret;
uint32_t flags = 0;
mca_pml_cm_thin_recv_request_t *recvreq;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
ompi_proc_t* ompi_proc;
@ -506,7 +527,8 @@ mca_pml_cm_mrecv(void *buf,
(*message)->peer,
datatype,
buf,
count);
count,
flags);
MCA_PML_CM_THIN_RECV_REQUEST_MATCHED_START(recvreq,
message, ret);

Просмотреть файл

@ -13,6 +13,7 @@
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -92,7 +93,8 @@ do { \
src, \
datatype, \
addr, \
count ) \
count, \
flags ) \
do { \
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, false); \
(request)->req_base.req_ompi.req_mpi_object.comm = comm; \
@ -108,12 +110,13 @@ do { \
} else { \
ompi_proc = ompi_comm_peer_lookup( comm, src ); \
} \
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_recv( \
ompi_proc->super.proc_convertor, \
&(datatype->super), \
count, \
addr, \
0, \
flags, \
&(request)->req_base.req_convertor ); \
} while(0)
#else
@ -123,7 +126,8 @@ do { \
src, \
datatype, \
addr, \
count ) \
count, \
flags ) \
do { \
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, false); \
(request)->req_base.req_ompi.req_mpi_object.comm = comm; \
@ -134,12 +138,13 @@ do { \
OBJ_RETAIN(comm); \
OMPI_DATATYPE_RETAIN(datatype); \
\
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_recv( \
ompi_mpi_local_convertor, \
&(datatype->super), \
count, \
addr, \
0, \
flags, \
&(request)->req_base.req_convertor ); \
} while(0)
#endif
@ -153,6 +158,7 @@ do { \
datatype, \
addr, \
count, \
flags, \
persistent) \
do { \
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \
@ -173,12 +179,13 @@ do { \
} else { \
ompi_proc = ompi_comm_peer_lookup( comm, src ); \
} \
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_recv( \
ompi_proc->super.proc_convertor, \
&(datatype->super), \
count, \
addr, \
0, \
flags, \
&(request)->req_base.req_convertor ); \
} while(0)
#else
@ -190,6 +197,7 @@ do { \
datatype, \
addr, \
count, \
flags, \
persistent) \
do { \
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \
@ -205,12 +213,13 @@ do { \
OBJ_RETAIN(comm); \
OMPI_DATATYPE_RETAIN(datatype); \
\
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_recv( \
ompi_mpi_local_convertor, \
&(datatype->super), \
count, \
addr, \
0, \
flags, \
&(request)->req_base.req_convertor ); \
} while(0)
#endif

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -53,4 +54,20 @@ struct mca_pml_cm_request_t {
typedef struct mca_pml_cm_request_t mca_pml_cm_request_t;
OBJ_CLASS_DECLARATION(mca_pml_cm_request_t);
/*
* Avoid CUDA convertor inits only for contiguous memory and if indicated by
* the MTL. For non-contiguous memory, do not skip CUDA convertor init phases.
*/
#if OPAL_CUDA_SUPPORT
#define MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count) \
{ \
if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count) \
&& (ompi_mtl->mtl_flags & MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE)) { \
flags |= CONVERTOR_SKIP_CUDA_INIT; \
} \
}
#else
#define MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count)
#endif
#endif

Просмотреть файл

@ -14,6 +14,7 @@
* reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -125,18 +126,20 @@ do { \
datatype, \
sendmode, \
buf, \
count) \
count, \
flags ) \
{ \
OBJ_RETAIN(comm); \
OMPI_DATATYPE_RETAIN(datatype); \
(req_send)->req_base.req_comm = comm; \
(req_send)->req_base.req_datatype = datatype; \
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_send( \
ompi_proc->super.proc_convertor, \
&(datatype->super), \
count, \
buf, \
0, \
flags, \
&(req_send)->req_base.req_convertor ); \
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
(req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \
@ -154,18 +157,20 @@ do { \
datatype, \
sendmode, \
buf, \
count) \
count, \
flags ) \
{ \
OBJ_RETAIN(comm); \
OMPI_DATATYPE_RETAIN(datatype); \
(req_send)->req_base.req_comm = comm; \
(req_send)->req_base.req_datatype = datatype; \
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_send( \
ompi_mpi_local_convertor, \
&(datatype->super), \
count, \
buf, \
0, \
flags, \
&(req_send)->req_base.req_convertor ); \
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
(req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \
@ -185,18 +190,20 @@ do { \
datatype, \
sendmode, \
buf, \
count) \
count, \
flags ) \
{ \
OBJ_RETAIN(comm); \
OMPI_DATATYPE_RETAIN(datatype); \
(req_send)->req_base.req_comm = comm; \
(req_send)->req_base.req_datatype = datatype; \
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_send( \
ompi_proc->super.proc_convertor, \
&(datatype->super), \
count, \
buf, \
0, \
flags, \
&(req_send)->req_base.req_convertor ); \
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
(req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \
@ -215,7 +222,8 @@ do { \
datatype, \
sendmode, \
buf, \
count) \
count, \
flags ) \
{ \
OBJ_RETAIN(comm); \
OMPI_DATATYPE_RETAIN(datatype); \
@ -235,12 +243,13 @@ do { \
(req_send)->req_base.req_convertor.count = count; \
(req_send)->req_base.req_convertor.pDesc = &datatype->super; \
} else { \
MCA_PML_CM_SWITCH_CUDA_CONVERTOR_OFF(flags, datatype, count); \
opal_convertor_copy_and_prepare_for_send( \
ompi_mpi_local_convertor, \
&(datatype->super), \
count, \
buf, \
0, \
flags, \
&(req_send)->req_base.req_convertor ); \
} \
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
@ -263,7 +272,8 @@ do { \
persistent, \
blocking, \
buf, \
count) \
count, \
flags ) \
do { \
OMPI_REQUEST_INIT(&(sendreq->req_send.req_base.req_ompi), \
persistent); \
@ -278,7 +288,8 @@ do { \
datatype, \
sendmode, \
buf, \
count); \
count, \
flags ) \
opal_convertor_get_packed_size( \
&sendreq->req_send.req_base.req_convertor, \
&sendreq->req_count ); \
@ -297,7 +308,8 @@ do { \
datatype, \
sendmode, \
buf, \
count) \
count, \
flags ) \
do { \
OMPI_REQUEST_INIT(&(sendreq->req_send.req_base.req_ompi), \
false); \
@ -308,7 +320,8 @@ do { \
datatype, \
sendmode, \
buf, \
count); \
count, \
flags); \
sendreq->req_send.req_base.req_pml_complete = false; \
} while(0)

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -569,7 +570,9 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
convertor->flags |= CONVERTOR_RECV;
#if OPAL_CUDA_SUPPORT
mca_cuda_convertor_init(convertor, pUserBuf);
if (!( convertor->flags & CONVERTOR_SKIP_CUDA_INIT )) {
mca_cuda_convertor_init(convertor, pUserBuf);
}
#endif
assert(! (convertor->flags & CONVERTOR_SEND));
@ -607,7 +610,9 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
{
convertor->flags |= CONVERTOR_SEND;
#if OPAL_CUDA_SUPPORT
mca_cuda_convertor_init(convertor, pUserBuf);
if (!( convertor->flags & CONVERTOR_SKIP_CUDA_INIT )) {
mca_cuda_convertor_init(convertor, pUserBuf);
}
#endif
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -55,6 +56,7 @@ BEGIN_C_DECLS
#define CONVERTOR_COMPLETED 0x08000000
#define CONVERTOR_CUDA_UNIFIED 0x10000000
#define CONVERTOR_HAS_REMOTE_SIZE 0x20000000
#define CONVERTOR_SKIP_CUDA_INIT 0x40000000
union dt_elem_desc;
typedef struct opal_convertor_t opal_convertor_t;