1
1

A lot of improuvements on the datatype. We still use the general version (BEWARE not suitable for performances ...)

- have a standard usage of the stack between functions.
- correctly compute the stack if we provide an offset
- remove all u_int* references and replace them with uint*
- small others cleanups

This commit was SVN r3403.
Этот коммит содержится в:
George Bosilca 2004-10-28 19:45:19 +00:00
родитель d8294afea9
Коммит 44bcf51be6
7 изменённых файлов: 250 добавлений и 229 удалений

Просмотреть файл

@ -55,16 +55,16 @@ OMPI_DECLSPEC extern ompi_pointer_array_t *ompi_datatype_f_to_c_table;
* by a set of basic elements. * by a set of basic elements.
*/ */
typedef struct __dt_elem_desc { typedef struct __dt_elem_desc {
u_int16_t flags; /**< flags for the record */ uint16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */ uint16_t type; /**< the basic data type id */
u_int32_t count; /**< number of elements */ uint32_t count; /**< number of elements */
long disp; /**< displacement of the first element */ long disp; /**< displacement of the first element */
u_int32_t extent; /**< extent of each element */ uint32_t extent; /**< extent of each element */
} dt_elem_desc_t; } dt_elem_desc_t;
typedef struct __dt_struct_desc { typedef struct __dt_struct_desc {
u_int32_t length; /* the maximum number of elements in the description array */ uint32_t length; /* the maximum number of elements in the description array */
u_int32_t used; /* the number of used elements in the description array */ uint32_t used; /* the number of used elements in the description array */
dt_elem_desc_t* desc; dt_elem_desc_t* desc;
} dt_type_desc_t; } dt_type_desc_t;
@ -74,15 +74,15 @@ typedef struct ompi_datatype_t {
ompi_object_t super; /**< basic superclass */ ompi_object_t super; /**< basic superclass */
unsigned long size; /**< total size in bytes of the memory used by the data if unsigned long size; /**< total size in bytes of the memory used by the data if
* the data is put on a contiguous buffer */ * the data is put on a contiguous buffer */
u_int32_t align; /**< data should be aligned to */ uint32_t align; /**< data should be aligned to */
long true_lb; long true_lb;
long true_ub; /**< the true ub of the data without user defined lb and ub */ long true_ub; /**< the true ub of the data without user defined lb and ub */
long lb; /**< lower bound in memory */ long lb; /**< lower bound in memory */
long ub; /**< upper bound in memory */ long ub; /**< upper bound in memory */
u_int16_t flags; /**< the flags */ uint16_t flags; /**< the flags */
u_int16_t id; /**< data id, normally the index in the data array. */ uint16_t id; /**< data id, normally the index in the data array. */
u_int32_t nbElems; /**< total number of elements inside the datatype */ uint32_t nbElems; /**< total number of elements inside the datatype */
u_int64_t bdt_used; /**< which basic datatypes are used in the data description */ uint64_t bdt_used; /**< which basic datatypes are used in the data description */
/* Attribute fields */ /* Attribute fields */
ompi_hash_table_t *d_keyhash; ompi_hash_table_t *d_keyhash;
@ -95,7 +95,7 @@ typedef struct ompi_datatype_t {
/* basic elements count used to compute the size of the datatype for /* basic elements count used to compute the size of the datatype for
* remote nodes */ * remote nodes */
u_int32_t btypes[DT_MAX_PREDEFINED]; uint32_t btypes[DT_MAX_PREDEFINED];
} dt_desc_t, ompi_datatype_t; } dt_desc_t, ompi_datatype_t;
OBJ_CLASS_DECLARATION( ompi_datatype_t ); OBJ_CLASS_DECLARATION( ompi_datatype_t );
@ -116,23 +116,23 @@ void ompi_ddt_dump( dt_desc_t* pData );
OMPI_DECLSPEC int ompi_ddt_duplicate( dt_desc_t* oldType, dt_desc_t** newType ); OMPI_DECLSPEC int ompi_ddt_duplicate( dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_contiguous( int count, dt_desc_t* oldType, dt_desc_t** newType ); OMPI_DECLSPEC int ompi_ddt_create_contiguous( int count, dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_vector( int count, int bLength, long stride, OMPI_DECLSPEC int ompi_ddt_create_vector( int count, int bLength, long stride,
dt_desc_t* oldType, dt_desc_t** newType ); dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_hvector( int count, int bLength, long stride, OMPI_DECLSPEC int ompi_ddt_create_hvector( int count, int bLength, long stride,
dt_desc_t* oldType, dt_desc_t** newType ); dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_indexed( int count, int* pBlockLength, int* pDisp, OMPI_DECLSPEC int ompi_ddt_create_indexed( int count, int* pBlockLength, int* pDisp,
dt_desc_t* oldType, dt_desc_t** newType ); dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_hindexed( int count, int* pBlockLength, long* pDisp, OMPI_DECLSPEC int ompi_ddt_create_hindexed( int count, int* pBlockLength, long* pDisp,
dt_desc_t* oldType, dt_desc_t** newType ); dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_indexed_block( int count, int bLength, int* pDisp, OMPI_DECLSPEC int ompi_ddt_create_indexed_block( int count, int bLength, int* pDisp,
dt_desc_t* oldType, dt_desc_t** newType ); dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_struct( int count, int* pBlockLength, long* pDisp, OMPI_DECLSPEC int ompi_ddt_create_struct( int count, int* pBlockLength, long* pDisp,
dt_desc_t** pTypes, dt_desc_t** newType ); dt_desc_t** pTypes, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType ); OMPI_DECLSPEC int ompi_ddt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts, OMPI_DECLSPEC int ompi_ddt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
int order, dt_desc_t* oldType, dt_desc_t** newType ); int order, dt_desc_t* oldType, dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib, OMPI_DECLSPEC int ompi_ddt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib,
int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType, int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType,
dt_desc_t** newType ); dt_desc_t** newType );
OMPI_DECLSPEC int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent ); OMPI_DECLSPEC int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent );
@ -158,30 +158,30 @@ OMPI_DECLSPEC int ompi_ddt_optimize_short( dt_desc_t* pData, int count, dt_type_
/* flags for the datatypes */ /* flags for the datatypes */
typedef int (*conversion_fct_t)( unsigned int count, typedef int (*conversion_fct_t)( unsigned int count,
void* from, unsigned int from_len, long from_extent, void* from, uint32_t from_len, long from_extent,
void* to, unsigned int in_length, long to_extent ); void* to, uint32_t in_length, long to_extent );
typedef struct __dt_stack dt_stack_t; typedef struct __dt_stack dt_stack_t;
typedef struct ompi_convertor_t ompi_convertor_t; typedef struct ompi_convertor_t ompi_convertor_t;
typedef int (*convertor_advance_fct_t)( ompi_convertor_t* pConvertor, typedef int (*convertor_advance_fct_t)( ompi_convertor_t* pConvertor,
struct iovec* pInputv, struct iovec* pInputv,
unsigned int* inputCount, uint32_t* inputCount,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ); int32_t* freeAfter );
typedef void*(*memalloc_fct_t)( size_t* pLength ); typedef void*(*memalloc_fct_t)( size_t* pLength );
struct ompi_convertor_t { struct ompi_convertor_t {
ompi_object_t super; /**< basic superclass */ ompi_object_t super; /**< basic superclass */
dt_desc_t* pDesc; /**< the datatype description associated with the convertor */ dt_desc_t* pDesc; /**< the datatype description associated with the convertor */
u_int32_t remoteArch; /**< the remote architecture */ uint32_t remoteArch; /**< the remote architecture */
dt_stack_t* pStack; /**< the local stack for the actual conversion */ dt_stack_t* pStack; /**< the local stack for the actual conversion */
u_int32_t converted; /**< the number of already converted elements */ uint32_t converted; /**< the number of already converted elements */
u_int32_t bConverted; /**< the size of already converted elements in bytes */ uint32_t bConverted; /**< the size of already converted elements in bytes */
u_int32_t flags; /**< the properties of this convertor */ uint32_t flags; /**< the properties of this convertor */
u_int32_t count; /**< the total number of full datatype elements */ uint32_t count; /**< the total number of full datatype elements */
u_int32_t stack_pos; /**< the actual position on the stack */ uint32_t stack_pos; /**< the actual position on the stack */
char* pBaseBuf; /**< initial buffer as supplied by the user */ char* pBaseBuf; /**< initial buffer as supplied by the user */
u_int32_t available_space; /**< total available space */ uint32_t available_space; /**< total available space */
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */ convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
memalloc_fct_t memAlloc_fn; /**< pointer to the memory allocation function */ memalloc_fct_t memAlloc_fn; /**< pointer to the memory allocation function */
conversion_fct_t* pFunctions; /**< the convertor functions pointer */ conversion_fct_t* pFunctions; /**< the convertor functions pointer */

Просмотреть файл

@ -105,19 +105,19 @@ struct __dt_stack {
* for the name of the fields. * for the name of the fields.
*/ */
typedef struct __dt_loop_desc { typedef struct __dt_loop_desc {
u_int16_t flags; /**< flags for the record */ uint16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */ uint16_t type; /**< the basic data type id */
u_int32_t loops; /**< number of times the loop have to be done */ uint32_t loops; /**< number of times the loop have to be done */
long items; /**< number of items in the loop */ long items; /**< number of items in the loop */
u_int32_t extent; /**< extent of the whole loop */ uint32_t extent; /**< extent of the whole loop */
} dt_loop_desc_t; } dt_loop_desc_t;
typedef struct __dt_endloop_desc { typedef struct __dt_endloop_desc {
u_int16_t flags; /**< flags for the record */ uint16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */ uint16_t type; /**< the basic data type id */
u_int32_t items; /**< number of items in the loop */ uint32_t items; /**< number of items in the loop */
long total_extent; /**< total extent of the loop taking in account the repetitions */ long total_extent; /**< total extent of the loop taking in account the repetitions */
u_int32_t size; /**< real size of the data in the loop */ uint32_t size; /**< real size of the data in the loop */
} dt_endloop_desc_t; } dt_endloop_desc_t;
/* keep the last 16 bits free for data flags */ /* keep the last 16 bits free for data flags */

Просмотреть файл

@ -21,7 +21,7 @@
int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
unsigned int count, long disp, long extent ) unsigned int count, long disp, long extent )
{ {
u_int32_t newLength, place_needed = 0, i; uint32_t newLength, place_needed = 0, i;
short localFlags = 0; /* no specific options yet */ short localFlags = 0; /* no specific options yet */
dt_elem_desc_t *pLast, *pLoop = NULL; dt_elem_desc_t *pLast, *pLoop = NULL;
long lb, ub; long lb, ub;

Просмотреть файл

@ -208,7 +208,7 @@ int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
free( (PDST)->opt_desc.desc ); \ free( (PDST)->opt_desc.desc ); \
(PDST)->opt_desc = (PSRC)->opt_desc; \ (PDST)->opt_desc = (PSRC)->opt_desc; \
memcpy( (PDST)->btypes, (PSRC)->btypes, \ memcpy( (PDST)->btypes, (PSRC)->btypes, \
DT_MAX_PREDEFINED * sizeof(u_int32_t) ); \ DT_MAX_PREDEFINED * sizeof(uint32_t) ); \
} while(0) } while(0)
#define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2 ) \ #define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2 ) \

Просмотреть файл

@ -12,15 +12,15 @@
static static
int ompi_convertor_pack_general( ompi_convertor_t* pConvertor, int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
struct iovec* iov, unsigned int* out_size, struct iovec* iov, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int32_t* freeAfter )
{ {
dt_stack_t* pStack; /* pointer to the position on the stack */ dt_stack_t* pStack; /* pointer to the position on the stack */
unsigned int pos_desc; /* actual position in the description of the derived datatype */ uint32_t pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */ int count_desc; /* the number of items already done in the actual pos_desc */
int type; /* type at current position */ int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */ uint32_t advance; /* number of bytes that we should advance the buffer */
long disp_desc = 0; /* compute displacement for truncated data */ long disp_desc = 0; /* compute displacement for truncated data */
int bConverted = 0; /* number of bytes converted this time */ int bConverted = 0; /* number of bytes converted this time */
dt_desc_t *pData = pConvertor->pDesc; dt_desc_t *pData = pConvertor->pDesc;
@ -29,7 +29,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
int oCount = (pData->ub - pData->lb) * pConvertor->count; int oCount = (pData->ub - pData->lb) * pConvertor->count;
char* pInput; char* pInput;
int iCount, rc; int iCount, rc;
unsigned int iov_count, total_bytes_converted = 0; uint32_t iov_count, total_bytes_converted = 0;
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", (void*)pConvertor, DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", (void*)pConvertor,
iov[0].iov_base, iov[0].iov_len, *out_size ); iov[0].iov_base, iov[0].iov_len, *out_size );
@ -51,7 +51,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) { for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
bConverted = 0; bConverted = 0;
if( iov[iov_count].iov_base == NULL ) { if( iov[iov_count].iov_base == NULL ) {
unsigned int length = iov[iov_count].iov_len; uint32_t length = iov[iov_count].iov_len;
if( length <= 0 ) if( length <= 0 )
length = pConvertor->count * pData->size - pConvertor->bConverted - bConverted; length = pConvertor->count * pData->size - pConvertor->bConverted - bConverted;
if( (*max_data) < length ) if( (*max_data) < length )
@ -140,17 +140,17 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
static static
int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv, int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
struct iovec* iov, struct iovec* iov,
unsigned int* out_size, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int* freeAfter )
{ {
dt_stack_t* pStack; /* pointer to the position on the stack */ dt_stack_t* pStack; /* pointer to the position on the stack */
u_int32_t pos_desc; /* actual position in the description of the derived datatype */ uint32_t pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */ int type; /* type at current position */
int i; /* index for basic elements with extent */ int i; /* index for basic elements with extent */
int bConverted = 0; /* number of bytes converted/moved this time */ int bConverted = 0; /* number of bytes converted/moved this time */
long lastDisp = 0, last_count = 0; long lastDisp = 0, last_count = 0;
u_int32_t space = iov[0].iov_len, last_blength = 0; uint32_t space = iov[0].iov_len, last_blength = 0;
char* pDestBuf; char* pDestBuf;
dt_desc_t* pData = pConv->pDesc; dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems; dt_elem_desc_t* pElems;
@ -306,20 +306,20 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
* return the pointer to the contiguous piece of memory to the upper level. * return the pointer to the contiguous piece of memory to the upper level.
*/ */
static static
int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv, int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
struct iovec* iov, struct iovec* iov,
unsigned int *out_size, uint32_t *out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int* freeAfter )
{ {
dt_stack_t* pStack; /* pointer to the position on the stack */ dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */ int pos_desc; /* actual position in the description of the derived datatype */
int i; /* index for basic elements with extent */ int i; /* index for basic elements with extent */
u_int32_t iov_pos = 0; /* index in the iovec that we put data inside */ uint32_t iov_pos = 0; /* index in the iovec that we put data inside */
int bConverted = 0; /* number of bytes converted/moved this time */ int bConverted = 0; /* number of bytes converted/moved this time */
u_int32_t space_on_iovec; /* amount of free space on the current iovec */ uint32_t space_on_iovec; /* amount of free space on the current iovec */
long lastDisp = 0, last_count = 0; long lastDisp = 0, last_count = 0;
u_int32_t space = *max_data, last_blength = 0, saveLength; uint32_t space = *max_data, last_blength = 0, saveLength;
char *pDestBuf, *savePos; char *pDestBuf, *savePos;
dt_desc_t* pData = pConv->pDesc; dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems; dt_elem_desc_t* pElems;
@ -571,12 +571,15 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
return (pConv->bConverted == (pData->size * pConv->count)); return (pConv->bConverted == (pData->size * pConv->count));
} }
/* the Contig versions does not use the stack. They can easily retrieve
* the status with just the informations from pConvertor->bConverted.
*/
static static
int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv, int ompi_convertor_pack_no_conversion_contig( ompi_convertor_t* pConv,
struct iovec* iov, struct iovec* iov,
unsigned int* out_size, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int* freeAfter )
{ {
dt_desc_t* pData = pConv->pDesc; dt_desc_t* pData = pConv->pDesc;
char* pSrc = pConv->pBaseBuf + pData->true_lb; char* pSrc = pConv->pBaseBuf + pData->true_lb;
@ -584,8 +587,9 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
char* pDest; char* pDest;
size_t length = pData->size * pConv->count; size_t length = pData->size * pConv->count;
long extent; long extent;
u_int32_t max_allowed = *max_data; uint32_t max_allowed = *max_data;
u_int32_t i, index; uint32_t i, index;
uint32_t iov_count, total_bytes_converted = 0;
i = pConv->bConverted / pData->size; /* how many we already pack */ i = pConv->bConverted / pData->size; /* how many we already pack */
extent = pData->ub - pData->lb; extent = pData->ub - pData->lb;
@ -595,99 +599,102 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
/* There are some optimizations that can be done if the upper level /* There are some optimizations that can be done if the upper level
* does not provide a buffer. * does not provide a buffer.
*/ */
if( iov[0].iov_base == NULL ) { for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
/* special case for small data. We avoid allocating memory if we if( iov[iov_count].iov_base == NULL ) {
* can fill the iovec directly with the address of the remaining /* special case for small data. We avoid allocating memory if we
* data. * can fill the iovec directly with the address of the remaining
*/ * data.
if( (pConv->count - i) < (*out_size) ) { */
for( index = 0; i < pConv->count; i++, index++ ) { if( (pConv->count - i) < ((*out_size) - iov_count) ) {
iov[index].iov_base = pSrc; for( index = iov_count; i < pConv->count; i++, index++ ) {
iov[index].iov_len = pData->size; iov[index].iov_base = pSrc;
pSrc += extent; iov[index].iov_len = pData->size;
pConv->bConverted += pData->size; pSrc += extent;
} pConv->bConverted += pData->size;
*out_size = index; }
*max_data = index * pData->size; *out_size = iov_count + index;
return 1; /* we're done */ *max_data = total_bytes_converted + index * pData->size;
} return 1; /* we're done */
/* now special case for big contiguous data with gaps around */ }
if( pData->size >= IOVEC_MEM_LIMIT ) { /* now special case for big contiguous data with gaps around */
/* as we dont have to copy any data, we can simply fill the iovecs if( pData->size >= IOVEC_MEM_LIMIT ) {
* with data from the user data description. /* as we dont have to copy any data, we can simply fill the iovecs
*/ * with data from the user data description.
for( index = 0; (i < pConv->count) && (index < (*out_size)); */
i++, index++ ) { for( index = iov_count; (i < pConv->count) && (index < (*out_size));
if( max_allowed < pData->size ) { i++, index++ ) {
iov[index].iov_base = pSrc; if( max_allowed < pData->size ) {
iov[index].iov_len = max_allowed; iov[index].iov_base = pSrc;
max_allowed = 0; iov[index].iov_len = max_allowed;
printf( "%s:%d Possible problem here\n", __FILE__, __LINE__ ); max_allowed = 0;
break; printf( "%s:%d Possible problem here\n", __FILE__, __LINE__ );
} else { break;
iov[index].iov_base = pSrc; } else {
iov[index].iov_len = pData->size; iov[index].iov_base = pSrc;
pSrc += extent; iov[index].iov_len = pData->size;
} pSrc += extent;
max_allowed -= iov[index].iov_len; }
} max_allowed -= iov[index].iov_len;
*out_size = index; }
*max_data = (*max_data) - max_allowed; *out_size = index;
pConv->bConverted += (*max_data); *max_data = total_bytes_converted + - max_allowed;
return (pConv->bConverted == length ); pConv->bConverted += total_bytes_converted;
} return (pConv->bConverted == length );
}
}
if( (long)pData->size == extent ) { /* that really contiguous */
if( iov[iov_count].iov_base == NULL ) {
iov[iov_count].iov_base = pSrc; /* + pConv->bConverted; */
if( (pConv->bConverted + iov[iov_count].iov_len) > length )
iov[iov_count].iov_len = length - pConv->bConverted;
} else {
/* contiguous data just memcpy the smallest data in the user buffer */
iov[iov_count].iov_len = IMIN( iov[iov_count].iov_len, length );
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[iov_count].iov_len,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( iov[iov_count].iov_base, pSrc, iov[iov_count].iov_len);
}
*max_data = iov[iov_count].iov_len;
} else {
uint32_t done, counter;
if( iov[iov_count].iov_base == NULL ) {
iov[iov_count].iov_base = pConv->memAlloc_fn( &(iov[iov_count].iov_len) );
(*freeAfter) |= (1 << 0);
if( max_allowed < iov[iov_count].iov_len )
iov[iov_count].iov_len = max_allowed;
else
max_allowed = iov[iov_count].iov_len;
}
pDest = iov[iov_count].iov_base;
done = pConv->bConverted - i * pData->size; /* how much data left last time */
pSrc += done;
if( done != 0 ) { /* still some data to copy from the last time */
done = pData->size - done;
OMPI_DDT_SAFEGUARD_POINTER( pSrc, done, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDest, pSrc, done );
pDest += done;
max_allowed -= done;
i++; /* just to compute the correct source pointer */
}
pSrc = pConv->pBaseBuf + pData->true_lb + i * extent;
counter = max_allowed / pData->size;
if( counter > pConv->count ) counter = pConv->count;
for( i = 0; i < counter; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pSrc, pData->size, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDest, pSrc, pData->size );
pDest += pData->size;
pSrc += extent;
}
max_allowed -= (counter * pData->size);
total_bytes_converted += iov[iov_count].iov_len - max_allowed;
iov[iov_count].iov_len = *max_data;
}
} }
*max_data = total_bytes_converted;
if( (long)pData->size == extent ) { /* that really contiguous */ pConv->bConverted += iov[iov_count].iov_len;
if( iov[0].iov_base == NULL ) { *out_size = iov_count;
iov[0].iov_base = pSrc; /* + pConv->bConverted; */
if( (pConv->bConverted + iov[0].iov_len) > length )
iov[0].iov_len = length - pConv->bConverted;
} else {
/* contiguous data just memcpy the smallest data in the user buffer */
iov[0].iov_len = IMIN( iov[0].iov_len, length );
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[0].iov_len,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( iov[0].iov_base, pSrc, iov[0].iov_len);
}
*max_data = iov[0].iov_len;
} else {
u_int32_t done, counter;
if( iov[0].iov_base == NULL ) {
iov[0].iov_base = pConv->memAlloc_fn( &(iov[0].iov_len) );
(*freeAfter) |= (1 << 0);
if( max_allowed < iov[0].iov_len )
iov[0].iov_len = max_allowed;
else
max_allowed = iov[0].iov_len;
}
pDest = iov[0].iov_base;
done = pConv->bConverted - i * pData->size; /* how much data left last time */
pSrc += done;
if( done != 0 ) { /* still some data to copy from the last time */
done = pData->size - done;
OMPI_DDT_SAFEGUARD_POINTER( pSrc, done, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDest, pSrc, done );
pDest += done;
max_allowed -= done;
i++; /* just to compute the correct source pointer */
}
pSrc = pConv->pBaseBuf + pData->true_lb + i * extent;
counter = max_allowed / pData->size;
if( counter > pConv->count ) counter = pConv->count;
for( i = 0; i < counter; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pSrc, pData->size, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDest, pSrc, pData->size );
pDest += pData->size;
pSrc += extent;
}
max_allowed -= (counter * pData->size);
*max_data = iov[0].iov_len - max_allowed;
iov[0].iov_len = *max_data;
}
pConv->bConverted += iov[0].iov_len;
*out_size = 1;
return (pConv->bConverted == length); return (pConv->bConverted == length);
} }
@ -716,12 +723,12 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
*/ */
int ompi_convertor_pack( ompi_convertor_t* pConv, int ompi_convertor_pack( ompi_convertor_t* pConv,
struct iovec* iov, struct iovec* iov,
unsigned int* out_size, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int* freeAfter )
{ {
dt_desc_t* pData = pConv->pDesc; dt_desc_t* pData = pConv->pDesc;
u_int32_t done = 0, index = 0; uint32_t done = 0, index = 0;
*freeAfter = 0; /* nothing to free yet */ *freeAfter = 0; /* nothing to free yet */
/* TODO should use the remote size */ /* TODO should use the remote size */
@ -762,7 +769,7 @@ int ompi_convertor_pack( ompi_convertor_t* pConv,
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED]; extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
int ompi_convertor_init_for_send( ompi_convertor_t* pConv, int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
unsigned int flags, uint32_t flags,
dt_desc_t* dt, dt_desc_t* dt,
int count, int count,
void* pUserBuf, void* pUserBuf,
@ -792,14 +799,11 @@ int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
pConv->memAlloc_fn = allocfn; pConv->memAlloc_fn = allocfn;
if( dt->flags & DT_FLAG_CONTIGUOUS ) { if( dt->flags & DT_FLAG_CONTIGUOUS ) {
pConv->flags |= DT_FLAG_CONTIGUOUS | CONVERTOR_HOMOGENEOUS; pConv->flags |= DT_FLAG_CONTIGUOUS | CONVERTOR_HOMOGENEOUS;
pConv->fAdvance = ompi_convertor_pack_homogeneous_contig; pConv->fAdvance = ompi_convertor_pack_no_conversion_contig;
} else { } else {
/* TODO handle the sender convert case */ /* TODO handle the sender convert case */
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy; pConv->fAdvance = ompi_convertor_pack_no_conversion_contig;
pConv->fAdvance = ompi_convertor_pack_homogeneous; pConv->fAdvance = ompi_convertor_pack_no_conversion;
#if defined(ONE_STEP)
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
#endif /* ONE_STEP */
} }
pConv->fAdvance = ompi_convertor_pack_general; pConv->fAdvance = ompi_convertor_pack_general;
if( starting_pos != 0 ) { if( starting_pos != 0 ) {
@ -863,7 +867,7 @@ ompi_convertor_t* ompi_convertor_get_copy( ompi_convertor_t* pConvertor )
} }
/* Actually we suppose that we can only do receiver side conversion */ /* Actually we suppose that we can only do receiver side conversion */
int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, unsigned int* pSize ) int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, uint32_t* pSize )
{ {
int ddt_size = 0; int ddt_size = 0;
@ -874,7 +878,7 @@ int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, unsigned int* pSize
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
int ompi_convertor_get_unpacked_size( ompi_convertor_t* pConv, unsigned int* pSize ) int ompi_convertor_get_unpacked_size( ompi_convertor_t* pConv, uint32_t* pSize )
{ {
int i; int i;
dt_desc_t* pData = pConv->pDesc; dt_desc_t* pData = pConv->pDesc;

Просмотреть файл

@ -40,22 +40,22 @@ void ompi_ddt_dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDe
*/ */
static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor, static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
struct iovec* iov, struct iovec* iov,
unsigned int* out_size, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int32_t* freeAfter )
{ {
dt_stack_t* pStack; /* pointer to the position on the stack */ dt_stack_t* pStack; /* pointer to the position on the stack */
unsigned int pos_desc; /* actual position in the description of the derived datatype */ uint32_t pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */ int count_desc; /* the number of items already done in the actual pos_desc */
int type; /* type at current position */ int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */ uint32_t advance; /* number of bytes that we should advance the buffer */
long disp_desc = 0; /* compute displacement for truncated data */ long disp_desc = 0; /* compute displacement for truncated data */
int bConverted = 0; /* number of bytes converted this time */ int bConverted = 0; /* number of bytes converted this time */
dt_elem_desc_t* pElems; dt_elem_desc_t* pElems;
int oCount = (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count; int oCount = (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count;
char* pInput; char* pInput;
int iCount, rc; int iCount, rc;
unsigned int iov_count, total_bytes_converted = 0; uint32_t iov_count, total_bytes_converted = 0;
/* For the general case always use the user data description */ /* For the general case always use the user data description */
pElems = pConvertor->pDesc->desc.desc; pElems = pConvertor->pDesc->desc.desc;
@ -152,13 +152,13 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv, static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
struct iovec* iov, struct iovec* iov,
unsigned int* out_size, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int32_t* freeAfter )
{ {
dt_stack_t* pStack; /* pointer to the position on the stack */ dt_stack_t* pStack; /* pointer to the position on the stack */
unsigned int pos_desc; /* actual position in the description of the derived datatype */ uint32_t pos_desc; /* actual position in the description of the derived datatype */
unsigned int i; /* counter for basic datatype with extent */ uint32_t i; /* counter for basic datatype with extent */
int bConverted = 0; /* number of bytes converted this time */ int bConverted = 0; /* number of bytes converted this time */
long lastDisp = 0; long lastDisp = 0;
size_t space = iov[0].iov_len, last_count = 0, last_blength = 0; size_t space = iov[0].iov_len, last_count = 0, last_blength = 0;
@ -266,7 +266,7 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
bConverted += last_count; bConverted += last_count;
lastDisp += last_count; lastDisp += last_count;
} }
if( pos_desc < (unsigned int)pStack->end_loop ) { /* cleanup the stack */ if( pos_desc < (uint32_t)pStack->end_loop ) { /* cleanup the stack */
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_blength, PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_blength,
lastDisp, pos_desc ); lastDisp, pos_desc );
} }
@ -279,16 +279,16 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv, static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
struct iovec* iov, struct iovec* iov,
unsigned int* out_size, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int32_t* freeAfter )
{ {
dt_desc_t *pData = pConv->pDesc; dt_desc_t *pData = pConv->pDesc;
char* pDstBuf = pConv->pBaseBuf; char* pDstBuf = pConv->pBaseBuf;
char* pSrcBuf = iov[0].iov_base; char* pSrcBuf = iov[0].iov_base;
int bConverted = 0; int bConverted = 0;
long extent = pData->ub - pData->lb; long extent = pData->ub - pData->lb;
unsigned int length, remaining, i; uint32_t length, remaining, i;
dt_stack_t* stack = &(pConv->pStack[1]); dt_stack_t* stack = &(pConv->pStack[1]);
*out_size = 1; *out_size = 1;
@ -344,12 +344,12 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
int ompi_convertor_unpack( ompi_convertor_t* pConvertor, int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
struct iovec* iov, struct iovec* iov,
unsigned int* out_size, uint32_t* out_size,
unsigned int* max_data, uint32_t* max_data,
int* freeAfter ) int32_t* freeAfter )
{ {
dt_desc_t *pData = pConvertor->pDesc; dt_desc_t *pData = pConvertor->pDesc;
unsigned int length; uint32_t length;
*freeAfter = 0; *freeAfter = 0;
if( pConvertor->bConverted == (pData->size * pConvertor->count) ) { if( pConvertor->bConverted == (pData->size * pConvertor->count) ) {
@ -383,13 +383,13 @@ int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
* basic datatype. * basic datatype.
*/ */
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \ #define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
static int copy_##TYPENAME( unsigned int count, \ static int copy_##TYPENAME( uint32_t count, \
char* from, unsigned int from_len, long from_extent, \ char* from, uint32_t from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent ) \ char* to, uint32_t to_len, long to_extent ) \
{ \ { \
unsigned int i; \ uint32_t i; \
unsigned int remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \ uint32_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
unsigned int local_TYPE_size = (COUNT) * sizeof(TYPE); \ uint32_t local_TYPE_size = (COUNT) * sizeof(TYPE); \
\ \
if( (remote_TYPE_size * count) > from_len ) { \ if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \ count = from_len / remote_TYPE_size; \
@ -416,22 +416,22 @@ static int copy_##TYPENAME( unsigned int count, \
return count; \ return count; \
} }
static int copy_bytes_1( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent ); static int copy_bytes_1( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_2( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent ); static int copy_bytes_2( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_4( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent ); static int copy_bytes_4( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_8( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent ); static int copy_bytes_8( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_12( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent ); static int copy_bytes_12( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_16( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent ); static int copy_bytes_16( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_20( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent ); static int copy_bytes_20( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
#define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \ #define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \
static int copy_##TYPENAME##_##COUNT( unsigned int count, \ static int copy_##TYPENAME##_##COUNT( uint32_t count, \
char* from, unsigned int from_len, long from_extent, \ char* from, uint32_t from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent) \ char* to, uint32_t to_len, long to_extent) \
{ \ { \
unsigned int i; \ uint32_t i; \
unsigned int remote_TYPE_size = (COUNT); /* TODO */ \ uint32_t remote_TYPE_size = (COUNT); /* TODO */ \
unsigned int local_TYPE_size = (COUNT); \ uint32_t local_TYPE_size = (COUNT); \
\ \
if( (remote_TYPE_size * count) > from_len ) { \ if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \ count = from_len / remote_TYPE_size; \
@ -601,7 +601,7 @@ int ompi_convertor_need_buffers( ompi_convertor_t* pConvertor )
} }
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED]; extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, unsigned int flags, int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, uint32_t flags,
dt_desc_t* pData, int count, dt_desc_t* pData, int count,
void* pUserBuf, int starting_point, void* pUserBuf, int starting_point,
memalloc_fct_t allocfn ) memalloc_fct_t allocfn )

Просмотреть файл

@ -19,7 +19,6 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
dt_stack_t* pStack; /* pointer to the position on the stack */ dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */ int pos_desc; /* actual position in the description of the derived datatype */
int type, lastLength = 0; int type, lastLength = 0;
long totalDisp;
ompi_datatype_t* pData = pConvertor->pDesc; ompi_datatype_t* pData = pConvertor->pDesc;
int* remoteLength; int* remoteLength;
int loop_length; int loop_length;
@ -34,6 +33,12 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
*/ */
if( pConvertor->bConverted == (unsigned long)starting_point ) return OMPI_SUCCESS; if( pConvertor->bConverted == (unsigned long)starting_point ) return OMPI_SUCCESS;
/* do we provide more place than necessary for the data ? */
if( starting_point >= (int)(pConvertor->count * pData->size) ) {
pConvertor->bConverted = pConvertor->count * pData->size;
return OMPI_SUCCESS;
}
pConvertor->stack_pos = 0; pConvertor->stack_pos = 0;
pStack = pConvertor->pStack; pStack = pConvertor->pStack;
/* Fill the first position on the stack. This one correspond to the /* Fill the first position on the stack. This one correspond to the
@ -81,24 +86,32 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
pStack->disp = pElems[loop_length].disp; pStack->disp = pElems[loop_length].disp;
pos_desc = 0; pos_desc = 0;
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] ); remoteLength = (int*)alloca( sizeof(int) * (pConvertor->pDesc->btypes[DT_LOOP] + 1));
remoteLength[0] = 0; /* initial value set to ZERO */ remoteLength[0] = 0; /* initial value set to ZERO */
/* The only way to get out of this loop is when we reach the desired position or
* when we finish the whole datatype.
*/
next_loop: next_loop:
totalDisp = pStack->disp;
loop_length = remoteLength[pConvertor->stack_pos]; loop_length = remoteLength[pConvertor->stack_pos];
while( pos_desc >= 0 ) { while( pos_desc >= 0 ) {
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */ if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems; dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems;
long extent; long extent;
/* as we reach the end of the loop the count should be decreased by one */
pStack->count--;
/* now we know the length of the loop. We can compute /* now we know the length of the loop. We can compute
* if the the starting_position will happend in one of the * if the starting_position will happend in one of the
* iterations of this loop. * iterations of this loop.
*/ */
remoteLength[pConvertor->stack_pos] = loop_length; remoteLength[pConvertor->stack_pos] = loop_length;
if( (loop_length * pStack->count) > resting_place ) { if( (loop_length * pStack->count) > resting_place ) {
/* OK here we stop in this loop. First save the loop /* We will stop somewhere on this loop. To avoid moving inside the loop
* on the stack, then save the position of the last data * multiple times, we can compute the index of the loop where we will
* stop. Once this index is computed we can then reparse the loop once
* until we find the correct position.
*/ */
int cnt = resting_place / loop_length; int cnt = resting_place / loop_length;
if( pStack->index == -1 ) { if( pStack->index == -1 ) {
@ -110,27 +123,32 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
resting_place -= cnt * loop_length; resting_place -= cnt * loop_length;
pStack->disp += cnt * extent; pStack->disp += cnt * extent;
pos_desc -= end_loop->items; /* go back to the first element in the loop */ pos_desc -= end_loop->items; /* go back to the first element in the loop */
pElems = &(pData->desc.desc[pos_desc]);
goto next_loop; goto next_loop;
} }
/* Not in this loop. Cleanup the stack and advance to the /* Not in this loop. Cleanup the stack and advance to the
* next data description. * next data description.
*/ */
pConvertor->stack_pos--; loop_length *= pStack->count;
resting_place -= loop_length; /* update the resting place */
/* if we ae embedded in another loop we should update it's length too */
pStack--; pStack--;
pConvertor->stack_pos--;
if( pConvertor->stack_pos > 0 ) {
remoteLength[pConvertor->stack_pos] += loop_length;
}
pos_desc++; pos_desc++;
pElems++; pElems++;
goto next_loop; goto next_loop;
} }
if( pElems->type == DT_LOOP ) { if( pElems->type == DT_LOOP ) {
remoteLength[pConvertor->stack_pos + 1] = 0; remoteLength[pConvertor->stack_pos + 1] = 0;
totalDisp = pElems->disp;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pData->desc.desc[pos_desc].count, pData->desc.desc[pos_desc].count,
totalDisp, pos_desc + pElems->disp ); pStack->disp, pos_desc + pElems->disp );
pos_desc++; pos_desc++;
pElems++; pElems++;
loop_length = 0; /* starting a new loop */ loop_length = 0; /* starting a new loop */
goto next_loop;
} }
while( pElems->flags & DT_FLAG_DATA ) { while( pElems->flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */ /* now here we have a basic datatype */
@ -141,7 +159,7 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
resting_place -= cnt * ompi_ddt_basicDatatypes[type]->size; resting_place -= cnt * ompi_ddt_basicDatatypes[type]->size;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pElems->count - cnt, pElems->count - cnt,
totalDisp + pElems->disp + cnt * pElems->extent, pElems->disp + cnt * pElems->extent,
pos_desc ); pos_desc );
pConvertor->bConverted += (starting_point - resting_place); pConvertor->bConverted += (starting_point - resting_place);
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -152,9 +170,8 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
pElems++; pElems++;
} }
} }
PUSH_STACK( pStack, pConvertor->stack_pos, 0, 0, 0, 0 );
/* Correctly update the bConverted field */ /* Correctly update the bConverted field */
pConvertor->bConverted = starting_point - resting_place; pConvertor->bConverted = pData->size * pConvertor->count;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }