1
1

Avoid datatype pack/unpack for contiguous data on homogenous systems.

Signed-off-by: Jithin Jose <jithin.jose@intel.com>
Этот коммит содержится в:
Jithin Jose 2015-05-07 14:52:46 -07:00
родитель fb12572438
Коммит 56869bff38
3 изменённых файлов: 76 добавлений и 4 удалений

Просмотреть файл

@ -38,6 +38,17 @@ ompi_mtl_datatype_pack(struct opal_convertor_t *convertor,
struct iovec iov;
uint32_t iov_count = 1;
#if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
if (convertor->pDesc &&
opal_datatype_is_contiguous_memory_layout(convertor->pDesc,
convertor->count)) {
*freeAfter = false;
*buffer = convertor->pBaseBuf;
*buffer_len = convertor->local_size;
return OPAL_SUCCESS;
}
#endif
opal_convertor_get_packed_size(convertor, buffer_len);
*freeAfter = false;
if( 0 == *buffer_len ) {

Просмотреть файл

@ -153,11 +153,26 @@ mca_pml_cm_send(void *buf,
opal_convertor_t convertor;
ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dst);
opal_convertor_copy_and_prepare_for_send(
#if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count)) {
convertor.remoteArch = ompi_proc->super.proc_convertor->remoteArch;
convertor.flags = ompi_proc->super.proc_convertor->flags;
convertor.master = ompi_proc->super.proc_convertor->master;
convertor.local_size = count * datatype->super.size;
convertor.pBaseBuf = (unsigned char*)buf;
convertor.count = count;
convertor.pDesc = &datatype->super;
} else
#endif
{
opal_convertor_copy_and_prepare_for_send(
ompi_proc->super.proc_convertor,
&datatype->super, count, buf, 0,
&convertor);
&datatype->super, count, buf, 0,
&convertor);
}
ret = OMPI_MTL_CALL(send(ompi_mtl,
comm,
dst,

Просмотреть файл

@ -92,6 +92,7 @@ do { \
}
#if (OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
#define MCA_PML_CM_SEND_REQUEST_INIT_COMMON(req_send, \
ompi_proc, \
comm, \
@ -121,6 +122,51 @@ do { \
(req_send)->req_base.req_free_called = false; \
}
#else
#define MCA_PML_CM_SEND_REQUEST_INIT_COMMON(req_send, \
ompi_proc, \
comm, \
tag, \
datatype, \
sendmode, \
buf, \
count) \
{ \
OBJ_RETAIN(comm); \
OBJ_RETAIN(datatype); \
(req_send)->req_base.req_comm = comm; \
(req_send)->req_base.req_datatype = datatype; \
if (opal_datatype_is_contiguous_memory_layout(&datatype->super, count)) { \
(req_send)->req_base.req_convertor.remoteArch = \
ompi_proc->super.proc_convertor->remoteArch; \
(req_send)->req_base.req_convertor.flags = \
ompi_proc->super.proc_convertor->flags; \
(req_send)->req_base.req_convertor.master = \
ompi_proc->super.proc_convertor->master; \
(req_send)->req_base.req_convertor.local_size = \
count * datatype->super.size; \
(req_send)->req_base.req_convertor.pBaseBuf = (unsigned char*)buf; \
(req_send)->req_base.req_convertor.count = count; \
(req_send)->req_base.req_convertor.pDesc = &datatype->super; \
} else { \
opal_convertor_copy_and_prepare_for_send( \
ompi_proc->super.proc_convertor, \
&(datatype->super), \
count, \
buf, \
0, \
&(req_send)->req_base.req_convertor ); \
} \
(req_send)->req_base.req_ompi.req_mpi_object.comm = comm; \
(req_send)->req_base.req_ompi.req_status.MPI_SOURCE = \
comm->c_my_rank; \
(req_send)->req_base.req_ompi.req_status.MPI_TAG = tag; \
(req_send)->req_base.req_ompi.req_status._ucount = count; \
(req_send)->req_send_mode = sendmode; \
(req_send)->req_base.req_free_called = false; \
}
#endif
#define MCA_PML_CM_HVY_SEND_REQUEST_INIT( sendreq, \
ompi_proc, \
comm, \