diff --git a/ompi/mca/pml/cm/pml_cm.h b/ompi/mca/pml/cm/pml_cm.h index 4ab5455881..0176609f80 100644 --- a/ompi/mca/pml/cm/pml_cm.h +++ b/ompi/mca/pml/cm/pml_cm.h @@ -74,7 +74,9 @@ mca_pml_cm_irecv_init(void *addr, struct ompi_request_t **request) { mca_pml_cm_hvy_recv_request_t *recvreq; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc; +#endif MCA_PML_CM_HVY_RECV_REQUEST_ALLOC(recvreq); if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE; @@ -98,8 +100,10 @@ mca_pml_cm_irecv(void *addr, { int ret; mca_pml_cm_thin_recv_request_t *recvreq; - ompi_proc_t* ompi_proc; - +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + ompi_proc_t* ompi_proc = NULL; +#endif + MCA_PML_CM_THIN_RECV_REQUEST_ALLOC(recvreq); if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE; @@ -136,7 +140,9 @@ mca_pml_cm_recv(void *addr, ompi_status_public_t * status) { int ret; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t *ompi_proc; +#endif opal_convertor_t convertor; mca_pml_cm_request_t req; mca_mtl_request_t *req_mtl = @@ -154,6 +160,7 @@ mca_pml_cm_recv(void *addr, req.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; req.req_ompi.req_status._cancelled = 0; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT if( MPI_ANY_SOURCE == src ) { ompi_proc = ompi_proc_local_proc; } else { @@ -161,12 +168,22 @@ mca_pml_cm_recv(void *addr, } opal_convertor_copy_and_prepare_for_recv( - ompi_proc->super.proc_convertor, - &(datatype->super), - count, - addr, - 0, - &convertor ); + ompi_proc->super.proc_convertor, + &(datatype->super), + count, + addr, + 0, + &convertor ); +#else + opal_convertor_copy_and_prepare_for_recv( + ompi_mpi_local_convertor, + &(datatype->super), + count, + addr, + 0, + &convertor ); +#endif + ret = OMPI_MTL_CALL(irecv(ompi_mtl, comm, src, @@ -198,7 +215,9 @@ mca_pml_cm_isend_init(void* buf, ompi_request_t** request) { mca_pml_cm_hvy_send_request_t *sendreq; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc; +#endif MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; @@ -225,7 +244,9 @@ mca_pml_cm_isend(void* buf, if(sendmode == MCA_PML_BASE_SEND_BUFFERED ) { mca_pml_cm_hvy_send_request_t* sendreq; - ompi_proc_t* ompi_proc; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + ompi_proc_t* ompi_proc = NULL; +#endif MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; @@ -248,7 +269,9 @@ mca_pml_cm_isend(void* buf, } else { mca_pml_cm_thin_send_request_t* sendreq; - ompi_proc_t* ompi_proc; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT + ompi_proc_t* ompi_proc = NULL; +#endif MCA_PML_CM_THIN_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; @@ -288,10 +311,11 @@ mca_pml_cm_send(void *buf, ompi_communicator_t* comm) { int ret = OMPI_ERROR; + ompi_proc_t * ompi_proc; if(sendmode == MCA_PML_BASE_SEND_BUFFERED) { mca_pml_cm_hvy_send_request_t *sendreq; - ompi_proc_t * ompi_proc; + MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, ompi_proc); if (OPAL_UNLIKELY(NULL == sendreq)) return OMPI_ERR_OUT_OF_RESOURCE; @@ -315,14 +339,13 @@ mca_pml_cm_send(void *buf, ompi_request_free( (ompi_request_t**)&sendreq ); } else { opal_convertor_t convertor; - ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dst); #if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT) if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count)) { - convertor.remoteArch = ompi_proc->super.proc_convertor->remoteArch; - convertor.flags = ompi_proc->super.proc_convertor->flags; - convertor.master = ompi_proc->super.proc_convertor->master; + convertor.remoteArch = ompi_mpi_local_convertor->remoteArch; + convertor.flags = ompi_mpi_local_convertor->flags; + convertor.master = ompi_mpi_local_convertor->master; convertor.local_size = count * datatype->super.size; convertor.pBaseBuf = (unsigned char*)buf; @@ -331,6 +354,7 @@ mca_pml_cm_send(void *buf, } else #endif { + ompi_proc = ompi_comm_peer_lookup(comm, dst); opal_convertor_copy_and_prepare_for_send( ompi_proc->super.proc_convertor, &datatype->super, count, buf, 0, @@ -422,9 +446,10 @@ mca_pml_cm_imrecv(void *buf, { int ret; mca_pml_cm_thin_recv_request_t *recvreq; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc; +#endif ompi_communicator_t *comm = (*message)->comm; - int peer = (*message)->peer; MCA_PML_CM_THIN_RECV_REQUEST_ALLOC(recvreq); if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE; @@ -432,7 +457,7 @@ mca_pml_cm_imrecv(void *buf, MCA_PML_CM_THIN_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, - peer, + (*message)->peer, datatype, buf, count); @@ -453,9 +478,10 @@ mca_pml_cm_mrecv(void *buf, { int ret; mca_pml_cm_thin_recv_request_t *recvreq; +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT ompi_proc_t* ompi_proc; +#endif ompi_communicator_t *comm = (*message)->comm; - int peer = (*message)->peer; MCA_PML_CM_THIN_RECV_REQUEST_ALLOC(recvreq); if( OPAL_UNLIKELY(NULL == recvreq) ) return OMPI_ERR_OUT_OF_RESOURCE; @@ -463,7 +489,7 @@ mca_pml_cm_mrecv(void *buf, MCA_PML_CM_THIN_RECV_REQUEST_INIT(recvreq, ompi_proc, comm, - peer, + (*message)->peer, datatype, buf, count); diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.h b/ompi/mca/pml/cm/pml_cm_recvreq.h index 7880974abf..1a9501eaaf 100644 --- a/ompi/mca/pml/cm/pml_cm_recvreq.h +++ b/ompi/mca/pml/cm/pml_cm_recvreq.h @@ -85,6 +85,7 @@ do { \ * @param comm (IN) Communicator. * @param persistent (IN) Is this a ersistent request. */ +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT #define MCA_PML_CM_THIN_RECV_REQUEST_INIT( request, \ ompi_proc, \ comm, \ @@ -115,7 +116,35 @@ do { \ 0, \ &(request)->req_base.req_convertor ); \ } while(0) +#else +#define MCA_PML_CM_THIN_RECV_REQUEST_INIT( request, \ + ompi_proc, \ + comm, \ + src, \ + datatype, \ + addr, \ + count ) \ +do { \ + OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, false); \ + (request)->req_base.req_ompi.req_mpi_object.comm = comm; \ + (request)->req_base.req_pml_complete = false; \ + (request)->req_base.req_free_called = false; \ + request->req_base.req_comm = comm; \ + request->req_base.req_datatype = datatype; \ + OBJ_RETAIN(comm); \ + OBJ_RETAIN(datatype); \ + \ + opal_convertor_copy_and_prepare_for_recv( \ + ompi_mpi_local_convertor, \ + &(datatype->super), \ + count, \ + addr, \ + 0, \ + &(request)->req_base.req_convertor ); \ +} while(0) +#endif +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT #define MCA_PML_CM_HVY_RECV_REQUEST_INIT( request, \ ompi_proc, \ comm, \ @@ -152,7 +181,39 @@ do { \ 0, \ &(request)->req_base.req_convertor ); \ } while(0) - +#else +#define MCA_PML_CM_HVY_RECV_REQUEST_INIT( request, \ + ompi_proc, \ + comm, \ + tag, \ + src, \ + datatype, \ + addr, \ + count, \ + persistent) \ +do { \ + OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \ + (request)->req_base.req_ompi.req_mpi_object.comm = comm; \ + (request)->req_base.req_pml_complete = OPAL_INT_TO_BOOL(persistent); \ + (request)->req_base.req_free_called = false; \ + request->req_base.req_comm = comm; \ + request->req_base.req_datatype = datatype; \ + request->req_tag = tag; \ + request->req_peer = src; \ + request->req_addr = addr; \ + request->req_count = count; \ + OBJ_RETAIN(comm); \ + OBJ_RETAIN(datatype); \ + \ + opal_convertor_copy_and_prepare_for_recv( \ + ompi_mpi_local_convertor, \ + &(datatype->super), \ + count, \ + addr, \ + 0, \ + &(request)->req_base.req_convertor ); \ + } while(0) +#endif /** * Start an initialized request. @@ -315,7 +376,6 @@ do { \ } extern void mca_pml_cm_recv_request_completion(struct mca_mtl_request_t *mtl_request); -extern void mca_pml_cm_recv_fast_completion(struct mca_mtl_request_t *mtl_request); #endif diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.h b/ompi/mca/pml/cm/pml_cm_sendreq.h index 6809b02349..bd90b1c390 100644 --- a/ompi/mca/pml/cm/pml_cm_sendreq.h +++ b/ompi/mca/pml/cm/pml_cm_sendreq.h @@ -59,6 +59,7 @@ typedef struct mca_pml_cm_hvy_send_request_t mca_pml_cm_hvy_send_request_t; OBJ_CLASS_DECLARATION(mca_pml_cm_hvy_send_request_t); +#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT #define MCA_PML_CM_THIN_SEND_REQUEST_ALLOC(sendreq, comm, dst, \ ompi_proc) \ do { \ @@ -74,8 +75,20 @@ do { \ sendreq->req_mtl.completion_callback = mca_pml_cm_send_request_completion; \ } \ } while(0) +#else +#define MCA_PML_CM_THIN_SEND_REQUEST_ALLOC(sendreq, comm, dst, \ + ompi_proc) \ +do { \ + sendreq = (mca_pml_cm_thin_send_request_t*) \ + opal_free_list_wait (&mca_pml_base_send_requests); \ + sendreq->req_send.req_base.req_pml_type = MCA_PML_CM_REQUEST_SEND_THIN; \ + sendreq->req_mtl.ompi_req = (ompi_request_t*) sendreq; \ + sendreq->req_mtl.completion_callback = mca_pml_cm_send_request_completion; \ +} while(0) +#endif +#if (OPAL_ENABLE_HETEROGENEOUS_SUPPORT) #define MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, \ ompi_proc) \ { \ @@ -90,7 +103,17 @@ do { \ sendreq->req_mtl.completion_callback = mca_pml_cm_send_request_completion; \ } \ } - +#else +#define MCA_PML_CM_HVY_SEND_REQUEST_ALLOC(sendreq, comm, dst, \ + ompi_proc) \ +{ \ + sendreq = (mca_pml_cm_hvy_send_request_t*) \ + opal_free_list_wait (&mca_pml_base_send_requests); \ + sendreq->req_send.req_base.req_pml_type = MCA_PML_CM_REQUEST_SEND_HEAVY; \ + sendreq->req_mtl.ompi_req = (ompi_request_t*) sendreq; \ + sendreq->req_mtl.completion_callback = mca_pml_cm_send_request_completion; \ +} +#endif #if (OPAL_ENABLE_HETEROGENEOUS_SUPPORT) #define MCA_PML_CM_SEND_REQUEST_INIT_COMMON(req_send, \ @@ -138,11 +161,11 @@ do { \ (req_send)->req_base.req_datatype = datatype; \ if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count)) { \ (req_send)->req_base.req_convertor.remoteArch = \ - ompi_proc->super.proc_convertor->remoteArch; \ + ompi_mpi_local_convertor->remoteArch; \ (req_send)->req_base.req_convertor.flags = \ - ompi_proc->super.proc_convertor->flags; \ + ompi_mpi_local_convertor->flags; \ (req_send)->req_base.req_convertor.master = \ - ompi_proc->super.proc_convertor->master; \ + ompi_mpi_local_convertor->master; \ (req_send)->req_base.req_convertor.local_size = \ count * datatype->super.size; \ (req_send)->req_base.req_convertor.pBaseBuf = (unsigned char*)buf; \ @@ -150,7 +173,7 @@ do { \ (req_send)->req_base.req_convertor.pDesc = &datatype->super; \ } else { \ opal_convertor_copy_and_prepare_for_send( \ - ompi_proc->super.proc_convertor, \ + ompi_mpi_local_convertor, \ &(datatype->super), \ count, \ buf, \