1
1

Small optimizations and some cleanups.

This commit was SVN r12400.
Этот коммит содержится в:
George Bosilca 2006-11-02 00:19:03 +00:00
родитель 994bfce7e8
Коммит ea91fd3bdb

Просмотреть файл

@ -23,6 +23,8 @@
#include <strings.h> #include <strings.h>
#endif #endif
#include "opal/prefetch.h"
#include "ompi/datatype/datatype.h" #include "ompi/datatype/datatype.h"
#include "ompi/datatype/convertor.h" #include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype_internal.h" #include "ompi/datatype/datatype_internal.h"
@ -203,7 +205,7 @@ ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int32_t mode )
#define OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( CONVERTOR, IOV, OUT, MAX_DATA ) \ #define OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( CONVERTOR, IOV, OUT, MAX_DATA ) \
do { \ do { \
/* protect against over packing data */ \ /* protect against over packing data */ \
if( (CONVERTOR)->flags & CONVERTOR_COMPLETED ) { \ if( OPAL_UNLIKELY((CONVERTOR)->flags & CONVERTOR_COMPLETED) ) { \
(IOV)[0].iov_len = 0; \ (IOV)[0].iov_len = 0; \
*(OUT) = 0; \ *(OUT) = 0; \
*(MAX_DATA) = 0; \ *(MAX_DATA) = 0; \
@ -241,7 +243,7 @@ int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
if( (*max_data) < pending_length ) if( (*max_data) < pending_length )
pending_length = (*max_data); pending_length = (*max_data);
for( i = 0; (i < *out_size) && (0 != pending_length); i++ ) { for( i = 0; i < *out_size; i++ ) {
base_pointer = pConv->pBaseBuf + pConv->bConverted + pConv->pDesc->true_lb; base_pointer = pConv->pBaseBuf + pConv->bConverted + pConv->pDesc->true_lb;
if( iov[i].iov_len > pending_length ) if( iov[i].iov_len > pending_length )
@ -253,6 +255,7 @@ int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len ); MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );
} }
pConv->bConverted += iov[i].iov_len; pConv->bConverted += iov[i].iov_len;
if( pending_length == iov[i].iov_len ) break;
pending_length -= iov[i].iov_len; pending_length -= iov[i].iov_len;
} }
*out_size = i; *out_size = i;
@ -366,6 +369,11 @@ int ompi_convertor_create_stack_at_begining( ompi_convertor_t* convertor,
dt_stack_t* pStack = convertor->pStack; dt_stack_t* pStack = convertor->pStack;
dt_elem_desc_t* pElems; dt_elem_desc_t* pElems;
/* The prepare function already make the selection on which data representation
* we have to use: normal one or the optimized version ?
*/
pElems = convertor->use_desc->desc;
convertor->stack_pos = 1; convertor->stack_pos = 1;
convertor->partial_length = 0; convertor->partial_length = 0;
convertor->bConverted = 0; convertor->bConverted = 0;
@ -376,10 +384,6 @@ int ompi_convertor_create_stack_at_begining( ompi_convertor_t* convertor,
pStack[0].index = -1; pStack[0].index = -1;
pStack[0].count = convertor->count; pStack[0].count = convertor->count;
pStack[0].disp = 0; pStack[0].disp = 0;
/* The prepare function already make the selection on which data representation
* we have to use: normal one or the optimized version ?
*/
pElems = convertor->use_desc->desc;
pStack[1].index = 0; pStack[1].index = 0;
pStack[1].disp = 0; pStack[1].disp = 0;
@ -423,9 +427,10 @@ int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,
{ \ { \
uint64_t bdt_mask; \ uint64_t bdt_mask; \
\ \
bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \ /* Compute the local in advance */ \
/* Compute the local and remote sizes */ \
convertor->local_size = convertor->count * datatype->size; \ convertor->local_size = convertor->count * datatype->size; \
bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \
\
convertor->pBaseBuf = (char*)pUserBuf; \ convertor->pBaseBuf = (char*)pUserBuf; \
convertor->count = count; \ convertor->count = count; \
/* Grab the datatype part of the flags */ \ /* Grab the datatype part of the flags */ \