1
1

Add more optimizations in the case where heterogeneous support

is not enabled.

This commit was SVN r18932.
Этот коммит содержится в:
George Bosilca 2008-07-17 04:54:47 +00:00
родитель 939fa3001d
Коммит cb66115512

Просмотреть файл

@ -417,55 +417,12 @@ int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,
}
/**
* This macro will initialize a convertor based on a previously created
* convertor. The idea is the move outside these function the heavy
* selection of architecture features for the convertors. I consider
* here that the convertor is clean, either never initialized or already
* cleaned.
* Compute the remote size.
*/
#define OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
#define OMPI_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
{ \
uint64_t bdt_mask; \
\
bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \
/* Compute the local in advance */ \
convertor->local_size = count * datatype->size; \
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
convertor->count = count; \
\
/* Grab the datatype part of the flags */ \
convertor->flags &= CONVERTOR_TYPE_MASK; \
convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \
convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
convertor->pDesc = (ompi_datatype_t*)datatype; \
convertor->bConverted = 0; \
/* By default consider the optimized description */ \
convertor->use_desc = &(datatype->opt_desc); \
\
/* If the data is empty we just mark the convertor as \
* completed. With this flag set the pack and unpack functions \
* will not do anything. \
*/ \
if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \
convertor->flags |= CONVERTOR_COMPLETED; \
convertor->remote_size = 0; \
return OMPI_SUCCESS; \
} \
\
if( OPAL_LIKELY(convertor->remoteArch == ompi_mpi_local_arch) ) { \
convertor->remote_size = convertor->local_size; \
if( (convertor->flags & (CONVERTOR_WITH_CHECKSUM | DT_FLAG_NO_GAPS)) == DT_FLAG_NO_GAPS ) { \
return OMPI_SUCCESS; \
} \
if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | DT_FLAG_CONTIGUOUS)) \
== DT_FLAG_CONTIGUOUS) && (1 == count) ) { \
return OMPI_SUCCESS; \
} \
} \
\
if( OPAL_LIKELY(0 == bdt_mask) ) { \
convertor->remote_size = convertor->local_size; \
} else { \
if( OPAL_UNLIKELY(0 != bdt_mask) ) { \
ompi_convertor_master_t* master; \
int i; \
convertor->flags ^= CONVERTOR_HOMOGENEOUS; \
@ -481,6 +438,59 @@ int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,
convertor->remote_size *= convertor->count; \
convertor->use_desc = &(datatype->desc); \
} \
}
#else
#define OMPI_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask)
#endif /* OMPI_ENABLE_HETEROGENEOUS_SUPPORT */
/**
* This macro will initialize a convertor based on a previously created
* convertor. The idea is the move outside these function the heavy
* selection of architecture features for the convertors. I consider
* here that the convertor is clean, either never initialized or already
* cleaned.
*/
#define OMPI_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \
{ \
uint64_t bdt_mask; \
\
/* If the data is empty we just mark the convertor as \
* completed. With this flag set the pack and unpack functions \
* will not do anything. \
*/ \
if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \
convertor->flags |= CONVERTOR_COMPLETED; \
convertor->local_size = convertor->remote_size = 0; \
return OMPI_SUCCESS; \
} \
bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \
/* Compute the local in advance */ \
convertor->local_size = count * datatype->size; \
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
convertor->count = count; \
\
/* Grab the datatype part of the flags */ \
convertor->flags &= CONVERTOR_TYPE_MASK; \
convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \
convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
convertor->pDesc = (ompi_datatype_t*)datatype; \
convertor->bConverted = 0; \
/* By default consider the optimized description */ \
convertor->use_desc = &(datatype->opt_desc); \
\
convertor->remote_size = convertor->local_size; \
if( OPAL_LIKELY(convertor->remoteArch == ompi_mpi_local_arch) ) { \
if( (convertor->flags & (CONVERTOR_WITH_CHECKSUM | DT_FLAG_NO_GAPS)) == DT_FLAG_NO_GAPS ) { \
return OMPI_SUCCESS; \
} \
if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | DT_FLAG_CONTIGUOUS)) \
== DT_FLAG_CONTIGUOUS) && (1 == count) ) { \
return OMPI_SUCCESS; \
} \
} \
\
OMPI_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype, \
bdt_mask ); \
assert( NULL != convertor->use_desc->desc ); \
/* For predefined datatypes (contiguous) do nothing more */ \
/* if checksum is enabled then always continue */ \