Optimize the execution path.
This commit was SVN r9763.
Этот коммит содержится в:
родитель
faba013106
Коммит
0fd4e2d906
@ -469,11 +469,11 @@ ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
|
||||
if( datatype->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
assert( convertor->flags & DT_FLAG_CONTIGUOUS );
|
||||
if( ((datatype->ub - datatype->lb) == (long)datatype->size) )
|
||||
convertor->fAdvance = ompi_pack_no_conv_contig_checksum;
|
||||
convertor->fAdvance = ompi_pack_homogeneous_contig_checksum;
|
||||
else if( 1 >= convertor->count ) /* gaps or no gaps */
|
||||
convertor->fAdvance = ompi_pack_no_conv_contig_checksum;
|
||||
convertor->fAdvance = ompi_pack_homogeneous_contig_checksum;
|
||||
else
|
||||
convertor->fAdvance = ompi_pack_no_conv_contig_with_gaps_checksum;
|
||||
convertor->fAdvance = ompi_pack_homogeneous_contig_with_gaps_checksum;
|
||||
} else {
|
||||
convertor->fAdvance = ompi_generic_simple_pack_checksum;
|
||||
}
|
||||
@ -481,11 +481,11 @@ ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
|
||||
if( datatype->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
assert( convertor->flags & DT_FLAG_CONTIGUOUS );
|
||||
if( ((datatype->ub - datatype->lb) == (long)datatype->size) )
|
||||
convertor->fAdvance = ompi_pack_no_conv_contig;
|
||||
convertor->fAdvance = ompi_pack_homogeneous_contig;
|
||||
else if( 1 >= convertor->count ) /* gaps or no gaps */
|
||||
convertor->fAdvance = ompi_pack_no_conv_contig;
|
||||
convertor->fAdvance = ompi_pack_homogeneous_contig;
|
||||
else
|
||||
convertor->fAdvance = ompi_pack_no_conv_contig_with_gaps;
|
||||
convertor->fAdvance = ompi_pack_homogeneous_contig_with_gaps;
|
||||
} else {
|
||||
convertor->fAdvance = ompi_generic_simple_pack;
|
||||
}
|
||||
|
@ -37,15 +37,15 @@ extern int ompi_pack_debug;
|
||||
#define ompi_pack_general_function ompi_pack_general_checksum
|
||||
#define ompi_pack_homogeneous_with_memcpy_function ompi_pack_homogeneous_with_memcpy_checksum
|
||||
#define ompi_pack_no_conversion_function ompi_pack_no_conversion_checksum
|
||||
#define ompi_pack_no_conv_contig_function ompi_pack_no_conv_contig_checksum
|
||||
#define ompi_pack_no_conv_contig_with_gaps_function ompi_pack_no_conv_contig_with_gaps_checksum
|
||||
#define ompi_pack_homogeneous_contig_function ompi_pack_homogeneous_contig_checksum
|
||||
#define ompi_pack_homogeneous_contig_with_gaps_function ompi_pack_homogeneous_contig_with_gaps_checksum
|
||||
#define ompi_generic_simple_pack_function ompi_generic_simple_pack_checksum
|
||||
#else
|
||||
#define ompi_pack_general_function ompi_pack_general
|
||||
#define ompi_pack_homogeneous_with_memcpy_function ompi_pack_homogeneous_with_memcpy
|
||||
#define ompi_pack_no_conversion_function ompi_pack_no_conversion
|
||||
#define ompi_pack_no_conv_contig_function ompi_pack_no_conv_contig
|
||||
#define ompi_pack_no_conv_contig_with_gaps_function ompi_pack_no_conv_contig_with_gaps
|
||||
#define ompi_pack_homogeneous_contig_function ompi_pack_homogeneous_contig
|
||||
#define ompi_pack_homogeneous_contig_with_gaps_function ompi_pack_homogeneous_contig_with_gaps
|
||||
#define ompi_generic_simple_pack_function ompi_generic_simple_pack
|
||||
#endif /* defined(CHECKSUM) */
|
||||
|
||||
@ -602,11 +602,11 @@ ompi_pack_no_conversion_function( ompi_convertor_t* pConv,
|
||||
* the status with just the informations from pConvertor->bConverted.
|
||||
*/
|
||||
int32_t
|
||||
ompi_pack_no_conv_contig_function( ompi_convertor_t* pConv,
|
||||
struct iovec* iov,
|
||||
uint32_t* out_size,
|
||||
size_t* max_data,
|
||||
int* freeAfter )
|
||||
ompi_pack_homogeneous_contig_function( ompi_convertor_t* pConv,
|
||||
struct iovec* iov,
|
||||
uint32_t* out_size,
|
||||
size_t* max_data,
|
||||
int* freeAfter )
|
||||
{
|
||||
dt_stack_t* pStack = pConv->pStack;
|
||||
char *source_base = NULL;
|
||||
@ -649,11 +649,11 @@ ompi_pack_no_conv_contig_function( ompi_convertor_t* pConv,
|
||||
}
|
||||
|
||||
int32_t
|
||||
ompi_pack_no_conv_contig_with_gaps_function( ompi_convertor_t* pConv,
|
||||
struct iovec* iov,
|
||||
uint32_t* out_size,
|
||||
size_t* max_data,
|
||||
int* freeAfter )
|
||||
ompi_pack_homogeneous_contig_with_gaps_function( ompi_convertor_t* pConv,
|
||||
struct iovec* iov,
|
||||
uint32_t* out_size,
|
||||
size_t* max_data,
|
||||
int* freeAfter )
|
||||
{
|
||||
const ompi_datatype_t* pData = pConv->pDesc;
|
||||
dt_stack_t* pStack = pConv->pStack;
|
||||
|
@ -40,19 +40,19 @@ ompi_pack_no_conversion_checksum( ompi_convertor_t* pConv,
|
||||
struct iovec* iov, uint32_t *out_size,
|
||||
size_t* max_data, int32_t* freeAfter );
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_pack_no_conv_contig( ompi_convertor_t* pConv,
|
||||
ompi_pack_homogeneous_contig( ompi_convertor_t* pConv,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data, int32_t* freeAfter );
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_pack_no_conv_contig_checksum( ompi_convertor_t* pConv,
|
||||
ompi_pack_homogeneous_contig_checksum( ompi_convertor_t* pConv,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data, int32_t* freeAfter );
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
|
||||
ompi_pack_homogeneous_contig_with_gaps( ompi_convertor_t* pConv,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data, int32_t* freeAfter );
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_pack_no_conv_contig_with_gaps_checksum( ompi_convertor_t* pConv,
|
||||
ompi_pack_homogeneous_contig_with_gaps_checksum( ompi_convertor_t* pConv,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data, int32_t* freeAfter );
|
||||
OMPI_DECLSPEC int32_t
|
||||
|
@ -543,6 +543,17 @@ ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
|
||||
type = pElem->elem.common.type;
|
||||
assert (type < DT_MAX_PREDEFINED);
|
||||
required_space = ompi_ddt_basicDatatypes[type]->size;
|
||||
if( 0 != iov_len_local ) {
|
||||
/* We have some partial data here. Let's copy it into the convertor
|
||||
* and keep it hot until the next round.
|
||||
*/
|
||||
assert (type < DT_MAX_PREDEFINED);
|
||||
assert( iov_len_local < ompi_ddt_basicDatatypes[type]->size );
|
||||
MEMCPY_CSUM( pConvertor->storage.data, packed_buffer, iov_len_local, pConvertor );
|
||||
DO_DEBUG( opal_output( 0, "Saving %d bytes for the next call\n", iov_len_local ); );
|
||||
pConvertor->storage.length = iov_len_local;
|
||||
iov_len_local = 0;
|
||||
}
|
||||
goto complete_loop;
|
||||
}
|
||||
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
|
||||
@ -551,17 +562,6 @@ ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
|
||||
}
|
||||
}
|
||||
complete_loop:
|
||||
if( !(pConvertor->flags & CONVERTOR_COMPLETED) && (0 != iov_len_local) ) {
|
||||
/* We have some partial data here. Let's copy it into the convertor
|
||||
* and keep it hot until the next round.
|
||||
*/
|
||||
assert (type < DT_MAX_PREDEFINED);
|
||||
assert( iov_len_local < ompi_ddt_basicDatatypes[type]->size );
|
||||
MEMCPY_CSUM( pConvertor->storage.data, packed_buffer, iov_len_local, pConvertor );
|
||||
DO_DEBUG( opal_output( 0, "Saving %d bytes for the next call\n", iov_len_local ); );
|
||||
pConvertor->storage.length = iov_len_local;
|
||||
iov_len_local = 0;
|
||||
}
|
||||
iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */
|
||||
total_unpacked += iov[iov_count].iov_len;
|
||||
pConvertor->bConverted += iov[iov_count].iov_len; /* update the already converted bytes */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user