From 7ccde09a093b807e17ba9e56085efae4161df495 Mon Sep 17 00:00:00 2001 From: Jithin Jose Date: Wed, 10 Jun 2015 17:12:26 -0700 Subject: [PATCH] Do opal_convertor_copy_and_prepare_for_send for buffered send mode as MCA_PML_CM_HVY_SEND_REQUEST_BSEND_ALLOC calls opal_convertor_pack directly. Signed-off-by: Jithin Jose --- ompi/mca/mtl/base/mtl_base_datatype.h | 1 + ompi/mca/pml/cm/pml_cm_sendreq.h | 62 ++++++++++++++++++++++++++- opal/datatype/opal_convertor.c | 4 +- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/ompi/mca/mtl/base/mtl_base_datatype.h b/ompi/mca/mtl/base/mtl_base_datatype.h index 33d9126929..0d515e2b8a 100644 --- a/ompi/mca/mtl/base/mtl_base_datatype.h +++ b/ompi/mca/mtl/base/mtl_base_datatype.h @@ -40,6 +40,7 @@ ompi_mtl_datatype_pack(struct opal_convertor_t *convertor, #if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT) if (convertor->pDesc && + !(convertor->flags & CONVERTOR_COMPLETED) && opal_datatype_is_contiguous_memory_layout(convertor->pDesc, convertor->count)) { *freeAfter = false; diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.h b/ompi/mca/pml/cm/pml_cm_sendreq.h index 09fc61b817..d699c2a25b 100644 --- a/ompi/mca/pml/cm/pml_cm_sendreq.h +++ b/ompi/mca/pml/cm/pml_cm_sendreq.h @@ -115,6 +115,66 @@ do { \ } #endif +#if (OPAL_ENABLE_HETEROGENEOUS_SUPPORT) +#define MCA_PML_CM_HVY_SEND_REQUEST_INIT_COMMON(req_send, \ + ompi_proc, \ + comm, \ + tag, \ + datatype, \ + sendmode, \ + buf, \ + count) \ +{ \ + OBJ_RETAIN(comm); \ + OBJ_RETAIN(datatype); \ + (req_send)->req_base.req_comm = comm; \ + (req_send)->req_base.req_datatype = datatype; \ + opal_convertor_copy_and_prepare_for_send( \ + ompi_proc->super.proc_convertor, \ + &(datatype->super), \ + count, \ + buf, \ + 0, \ + &(req_send)->req_base.req_convertor ); \ + (req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \ + (req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \ + comm->c_my_rank; \ + (req_send)->req_base.req_ompi.req_status.MPI_TAG = tag; \ + (req_send)->req_base.req_ompi.req_status._ucount = count; \ + (req_send)->req_send_mode = sendmode; \ + (req_send)->req_base.req_free_called = false; \ +} +#else +#define MCA_PML_CM_HVY_SEND_REQUEST_INIT_COMMON(req_send, \ + ompi_proc, \ + comm, \ + tag, \ + datatype, \ + sendmode, \ + buf, \ + count) \ +{ \ + OBJ_RETAIN(comm); \ + OBJ_RETAIN(datatype); \ + (req_send)->req_base.req_comm = comm; \ + (req_send)->req_base.req_datatype = datatype; \ + opal_convertor_copy_and_prepare_for_send( \ + ompi_mpi_local_convertor, \ + &(datatype->super), \ + count, \ + buf, \ + 0, \ + &(req_send)->req_base.req_convertor ); \ + (req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \ + (req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \ + comm->c_my_rank; \ + (req_send)->req_base.req_ompi.req_status.MPI_TAG = tag; \ + (req_send)->req_base.req_ompi.req_status._ucount = count; \ + (req_send)->req_send_mode = sendmode; \ + (req_send)->req_base.req_free_called = false; \ +} +#endif + #if (OPAL_ENABLE_HETEROGENEOUS_SUPPORT) #define MCA_PML_CM_SEND_REQUEST_INIT_COMMON(req_send, \ ompi_proc, \ @@ -209,7 +269,7 @@ do { \ sendreq->req_peer = dst; \ sendreq->req_addr = buf; \ sendreq->req_count = count; \ - MCA_PML_CM_SEND_REQUEST_INIT_COMMON( (&sendreq->req_send), \ + MCA_PML_CM_HVY_SEND_REQUEST_INIT_COMMON( (&sendreq->req_send), \ ompi_proc, \ comm, \ tag, \ diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 64e50ea5cb..9a211ba845 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -495,8 +495,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, * completed. With this flag set the pack and unpack functions \ * will not do anything. \ */ \ - convertor->pDesc = (opal_datatype_t*)datatype; \ - convertor->count = count; \ if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \ convertor->flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED; \ convertor->local_size = convertor->remote_size = 0; \ @@ -505,11 +503,13 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, /* Compute the local in advance */ \ convertor->local_size = count * datatype->size; \ convertor->pBaseBuf = (unsigned char*)pUserBuf; \ + convertor->count = count; \ \ /* Grab the datatype part of the flags */ \ convertor->flags &= CONVERTOR_TYPE_MASK; \ convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \ convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \ + convertor->pDesc = (opal_datatype_t*)datatype; \ convertor->bConverted = 0; \ /* By default consider the optimized description */ \ convertor->use_desc = &(datatype->opt_desc); \