1
1

A lot of improuvements on the datatype. We still use the general version (BEWARE not suitable for performances ...)

- have a standard usage of the stack between functions.
- correctly compute the stack if we provide an offset
- remove all u_int* references and replace them with uint*
- small others cleanups

This commit was SVN r3403.
Этот коммит содержится в:
George Bosilca 2004-10-28 19:45:19 +00:00
родитель d8294afea9
Коммит 44bcf51be6
7 изменённых файлов: 250 добавлений и 229 удалений

Просмотреть файл

@ -55,16 +55,16 @@ OMPI_DECLSPEC extern ompi_pointer_array_t *ompi_datatype_f_to_c_table;
* by a set of basic elements.
*/
typedef struct __dt_elem_desc {
u_int16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */
u_int32_t count; /**< number of elements */
uint16_t flags; /**< flags for the record */
uint16_t type; /**< the basic data type id */
uint32_t count; /**< number of elements */
long disp; /**< displacement of the first element */
u_int32_t extent; /**< extent of each element */
uint32_t extent; /**< extent of each element */
} dt_elem_desc_t;
typedef struct __dt_struct_desc {
u_int32_t length; /* the maximum number of elements in the description array */
u_int32_t used; /* the number of used elements in the description array */
uint32_t length; /* the maximum number of elements in the description array */
uint32_t used; /* the number of used elements in the description array */
dt_elem_desc_t* desc;
} dt_type_desc_t;
@ -74,15 +74,15 @@ typedef struct ompi_datatype_t {
ompi_object_t super; /**< basic superclass */
unsigned long size; /**< total size in bytes of the memory used by the data if
* the data is put on a contiguous buffer */
u_int32_t align; /**< data should be aligned to */
uint32_t align; /**< data should be aligned to */
long true_lb;
long true_ub; /**< the true ub of the data without user defined lb and ub */
long lb; /**< lower bound in memory */
long ub; /**< upper bound in memory */
u_int16_t flags; /**< the flags */
u_int16_t id; /**< data id, normally the index in the data array. */
u_int32_t nbElems; /**< total number of elements inside the datatype */
u_int64_t bdt_used; /**< which basic datatypes are used in the data description */
uint16_t flags; /**< the flags */
uint16_t id; /**< data id, normally the index in the data array. */
uint32_t nbElems; /**< total number of elements inside the datatype */
uint64_t bdt_used; /**< which basic datatypes are used in the data description */
/* Attribute fields */
ompi_hash_table_t *d_keyhash;
@ -95,7 +95,7 @@ typedef struct ompi_datatype_t {
/* basic elements count used to compute the size of the datatype for
* remote nodes */
u_int32_t btypes[DT_MAX_PREDEFINED];
uint32_t btypes[DT_MAX_PREDEFINED];
} dt_desc_t, ompi_datatype_t;
OBJ_CLASS_DECLARATION( ompi_datatype_t );
@ -158,30 +158,30 @@ OMPI_DECLSPEC int ompi_ddt_optimize_short( dt_desc_t* pData, int count, dt_type_
/* flags for the datatypes */
typedef int (*conversion_fct_t)( unsigned int count,
void* from, unsigned int from_len, long from_extent,
void* to, unsigned int in_length, long to_extent );
void* from, uint32_t from_len, long from_extent,
void* to, uint32_t in_length, long to_extent );
typedef struct __dt_stack dt_stack_t;
typedef struct ompi_convertor_t ompi_convertor_t;
typedef int (*convertor_advance_fct_t)( ompi_convertor_t* pConvertor,
struct iovec* pInputv,
unsigned int* inputCount,
unsigned int* max_data,
int* freeAfter );
uint32_t* inputCount,
uint32_t* max_data,
int32_t* freeAfter );
typedef void*(*memalloc_fct_t)( size_t* pLength );
struct ompi_convertor_t {
ompi_object_t super; /**< basic superclass */
dt_desc_t* pDesc; /**< the datatype description associated with the convertor */
u_int32_t remoteArch; /**< the remote architecture */
uint32_t remoteArch; /**< the remote architecture */
dt_stack_t* pStack; /**< the local stack for the actual conversion */
u_int32_t converted; /**< the number of already converted elements */
u_int32_t bConverted; /**< the size of already converted elements in bytes */
u_int32_t flags; /**< the properties of this convertor */
u_int32_t count; /**< the total number of full datatype elements */
u_int32_t stack_pos; /**< the actual position on the stack */
uint32_t converted; /**< the number of already converted elements */
uint32_t bConverted; /**< the size of already converted elements in bytes */
uint32_t flags; /**< the properties of this convertor */
uint32_t count; /**< the total number of full datatype elements */
uint32_t stack_pos; /**< the actual position on the stack */
char* pBaseBuf; /**< initial buffer as supplied by the user */
u_int32_t available_space; /**< total available space */
uint32_t available_space; /**< total available space */
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
memalloc_fct_t memAlloc_fn; /**< pointer to the memory allocation function */
conversion_fct_t* pFunctions; /**< the convertor functions pointer */

Просмотреть файл

@ -105,19 +105,19 @@ struct __dt_stack {
* for the name of the fields.
*/
typedef struct __dt_loop_desc {
u_int16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */
u_int32_t loops; /**< number of times the loop have to be done */
uint16_t flags; /**< flags for the record */
uint16_t type; /**< the basic data type id */
uint32_t loops; /**< number of times the loop have to be done */
long items; /**< number of items in the loop */
u_int32_t extent; /**< extent of the whole loop */
uint32_t extent; /**< extent of the whole loop */
} dt_loop_desc_t;
typedef struct __dt_endloop_desc {
u_int16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */
u_int32_t items; /**< number of items in the loop */
uint16_t flags; /**< flags for the record */
uint16_t type; /**< the basic data type id */
uint32_t items; /**< number of items in the loop */
long total_extent; /**< total extent of the loop taking in account the repetitions */
u_int32_t size; /**< real size of the data in the loop */
uint32_t size; /**< real size of the data in the loop */
} dt_endloop_desc_t;
/* keep the last 16 bits free for data flags */

Просмотреть файл

@ -21,7 +21,7 @@
int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
unsigned int count, long disp, long extent )
{
u_int32_t newLength, place_needed = 0, i;
uint32_t newLength, place_needed = 0, i;
short localFlags = 0; /* no specific options yet */
dt_elem_desc_t *pLast, *pLoop = NULL;
long lb, ub;

Просмотреть файл

@ -208,7 +208,7 @@ int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
free( (PDST)->opt_desc.desc ); \
(PDST)->opt_desc = (PSRC)->opt_desc; \
memcpy( (PDST)->btypes, (PSRC)->btypes, \
DT_MAX_PREDEFINED * sizeof(u_int32_t) ); \
DT_MAX_PREDEFINED * sizeof(uint32_t) ); \
} while(0)
#define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2 ) \

Просмотреть файл

@ -12,15 +12,15 @@
static
int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
struct iovec* iov, unsigned int* out_size,
unsigned int* max_data,
int* freeAfter )
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
unsigned int pos_desc; /* actual position in the description of the derived datatype */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */
int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */
uint32_t advance; /* number of bytes that we should advance the buffer */
long disp_desc = 0; /* compute displacement for truncated data */
int bConverted = 0; /* number of bytes converted this time */
dt_desc_t *pData = pConvertor->pDesc;
@ -29,7 +29,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
int oCount = (pData->ub - pData->lb) * pConvertor->count;
char* pInput;
int iCount, rc;
unsigned int iov_count, total_bytes_converted = 0;
uint32_t iov_count, total_bytes_converted = 0;
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", (void*)pConvertor,
iov[0].iov_base, iov[0].iov_len, *out_size );
@ -51,7 +51,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
bConverted = 0;
if( iov[iov_count].iov_base == NULL ) {
unsigned int length = iov[iov_count].iov_len;
uint32_t length = iov[iov_count].iov_len;
if( length <= 0 )
length = pConvertor->count * pData->size - pConvertor->bConverted - bConverted;
if( (*max_data) < length )
@ -140,17 +140,17 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
static
int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
struct iovec* iov,
unsigned int* out_size,
unsigned int* max_data,
uint32_t* out_size,
uint32_t* max_data,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
u_int32_t pos_desc; /* actual position in the description of the derived datatype */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int bConverted = 0; /* number of bytes converted/moved this time */
long lastDisp = 0, last_count = 0;
u_int32_t space = iov[0].iov_len, last_blength = 0;
uint32_t space = iov[0].iov_len, last_blength = 0;
char* pDestBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
@ -306,20 +306,20 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
* return the pointer to the contiguous piece of memory to the upper level.
*/
static
int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
struct iovec* iov,
unsigned int *out_size,
unsigned int* max_data,
uint32_t *out_size,
uint32_t* max_data,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int i; /* index for basic elements with extent */
u_int32_t iov_pos = 0; /* index in the iovec that we put data inside */
uint32_t iov_pos = 0; /* index in the iovec that we put data inside */
int bConverted = 0; /* number of bytes converted/moved this time */
u_int32_t space_on_iovec; /* amount of free space on the current iovec */
uint32_t space_on_iovec; /* amount of free space on the current iovec */
long lastDisp = 0, last_count = 0;
u_int32_t space = *max_data, last_blength = 0, saveLength;
uint32_t space = *max_data, last_blength = 0, saveLength;
char *pDestBuf, *savePos;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
@ -571,11 +571,14 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
return (pConv->bConverted == (pData->size * pConv->count));
}
/* the Contig versions does not use the stack. They can easily retrieve
* the status with just the informations from pConvertor->bConverted.
*/
static
int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
int ompi_convertor_pack_no_conversion_contig( ompi_convertor_t* pConv,
struct iovec* iov,
unsigned int* out_size,
unsigned int* max_data,
uint32_t* out_size,
uint32_t* max_data,
int* freeAfter )
{
dt_desc_t* pData = pConv->pDesc;
@ -584,8 +587,9 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
char* pDest;
size_t length = pData->size * pConv->count;
long extent;
u_int32_t max_allowed = *max_data;
u_int32_t i, index;
uint32_t max_allowed = *max_data;
uint32_t i, index;
uint32_t iov_count, total_bytes_converted = 0;
i = pConv->bConverted / pData->size; /* how many we already pack */
extent = pData->ub - pData->lb;
@ -595,20 +599,21 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
/* There are some optimizations that can be done if the upper level
* does not provide a buffer.
*/
if( iov[0].iov_base == NULL ) {
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
if( iov[iov_count].iov_base == NULL ) {
/* special case for small data. We avoid allocating memory if we
* can fill the iovec directly with the address of the remaining
* data.
*/
if( (pConv->count - i) < (*out_size) ) {
for( index = 0; i < pConv->count; i++, index++ ) {
if( (pConv->count - i) < ((*out_size) - iov_count) ) {
for( index = iov_count; i < pConv->count; i++, index++ ) {
iov[index].iov_base = pSrc;
iov[index].iov_len = pData->size;
pSrc += extent;
pConv->bConverted += pData->size;
}
*out_size = index;
*max_data = index * pData->size;
*out_size = iov_count + index;
*max_data = total_bytes_converted + index * pData->size;
return 1; /* we're done */
}
/* now special case for big contiguous data with gaps around */
@ -616,7 +621,7 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
/* as we dont have to copy any data, we can simply fill the iovecs
* with data from the user data description.
*/
for( index = 0; (i < pConv->count) && (index < (*out_size));
for( index = iov_count; (i < pConv->count) && (index < (*out_size));
i++, index++ ) {
if( max_allowed < pData->size ) {
iov[index].iov_base = pSrc;
@ -632,37 +637,37 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
max_allowed -= iov[index].iov_len;
}
*out_size = index;
*max_data = (*max_data) - max_allowed;
pConv->bConverted += (*max_data);
*max_data = total_bytes_converted + - max_allowed;
pConv->bConverted += total_bytes_converted;
return (pConv->bConverted == length );
}
}
if( (long)pData->size == extent ) { /* that really contiguous */
if( iov[0].iov_base == NULL ) {
iov[0].iov_base = pSrc; /* + pConv->bConverted; */
if( (pConv->bConverted + iov[0].iov_len) > length )
iov[0].iov_len = length - pConv->bConverted;
if( iov[iov_count].iov_base == NULL ) {
iov[iov_count].iov_base = pSrc; /* + pConv->bConverted; */
if( (pConv->bConverted + iov[iov_count].iov_len) > length )
iov[iov_count].iov_len = length - pConv->bConverted;
} else {
/* contiguous data just memcpy the smallest data in the user buffer */
iov[0].iov_len = IMIN( iov[0].iov_len, length );
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[0].iov_len,
iov[iov_count].iov_len = IMIN( iov[iov_count].iov_len, length );
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[iov_count].iov_len,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( iov[0].iov_base, pSrc, iov[0].iov_len);
MEMCPY( iov[iov_count].iov_base, pSrc, iov[iov_count].iov_len);
}
*max_data = iov[0].iov_len;
*max_data = iov[iov_count].iov_len;
} else {
u_int32_t done, counter;
uint32_t done, counter;
if( iov[0].iov_base == NULL ) {
iov[0].iov_base = pConv->memAlloc_fn( &(iov[0].iov_len) );
if( iov[iov_count].iov_base == NULL ) {
iov[iov_count].iov_base = pConv->memAlloc_fn( &(iov[iov_count].iov_len) );
(*freeAfter) |= (1 << 0);
if( max_allowed < iov[0].iov_len )
iov[0].iov_len = max_allowed;
if( max_allowed < iov[iov_count].iov_len )
iov[iov_count].iov_len = max_allowed;
else
max_allowed = iov[0].iov_len;
max_allowed = iov[iov_count].iov_len;
}
pDest = iov[0].iov_base;
pDest = iov[iov_count].iov_base;
done = pConv->bConverted - i * pData->size; /* how much data left last time */
pSrc += done;
if( done != 0 ) { /* still some data to copy from the last time */
@ -683,11 +688,13 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
pSrc += extent;
}
max_allowed -= (counter * pData->size);
*max_data = iov[0].iov_len - max_allowed;
iov[0].iov_len = *max_data;
total_bytes_converted += iov[iov_count].iov_len - max_allowed;
iov[iov_count].iov_len = *max_data;
}
pConv->bConverted += iov[0].iov_len;
*out_size = 1;
}
*max_data = total_bytes_converted;
pConv->bConverted += iov[iov_count].iov_len;
*out_size = iov_count;
return (pConv->bConverted == length);
}
@ -716,12 +723,12 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
*/
int ompi_convertor_pack( ompi_convertor_t* pConv,
struct iovec* iov,
unsigned int* out_size,
unsigned int* max_data,
uint32_t* out_size,
uint32_t* max_data,
int* freeAfter )
{
dt_desc_t* pData = pConv->pDesc;
u_int32_t done = 0, index = 0;
uint32_t done = 0, index = 0;
*freeAfter = 0; /* nothing to free yet */
/* TODO should use the remote size */
@ -762,7 +769,7 @@ int ompi_convertor_pack( ompi_convertor_t* pConv,
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
unsigned int flags,
uint32_t flags,
dt_desc_t* dt,
int count,
void* pUserBuf,
@ -792,14 +799,11 @@ int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
pConv->memAlloc_fn = allocfn;
if( dt->flags & DT_FLAG_CONTIGUOUS ) {
pConv->flags |= DT_FLAG_CONTIGUOUS | CONVERTOR_HOMOGENEOUS;
pConv->fAdvance = ompi_convertor_pack_homogeneous_contig;
pConv->fAdvance = ompi_convertor_pack_no_conversion_contig;
} else {
/* TODO handle the sender convert case */
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
pConv->fAdvance = ompi_convertor_pack_homogeneous;
#if defined(ONE_STEP)
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
#endif /* ONE_STEP */
pConv->fAdvance = ompi_convertor_pack_no_conversion_contig;
pConv->fAdvance = ompi_convertor_pack_no_conversion;
}
pConv->fAdvance = ompi_convertor_pack_general;
if( starting_pos != 0 ) {
@ -863,7 +867,7 @@ ompi_convertor_t* ompi_convertor_get_copy( ompi_convertor_t* pConvertor )
}
/* Actually we suppose that we can only do receiver side conversion */
int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, unsigned int* pSize )
int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, uint32_t* pSize )
{
int ddt_size = 0;
@ -874,7 +878,7 @@ int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, unsigned int* pSize
return OMPI_SUCCESS;
}
int ompi_convertor_get_unpacked_size( ompi_convertor_t* pConv, unsigned int* pSize )
int ompi_convertor_get_unpacked_size( ompi_convertor_t* pConv, uint32_t* pSize )
{
int i;
dt_desc_t* pData = pConv->pDesc;

Просмотреть файл

@ -40,22 +40,22 @@ void ompi_ddt_dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDe
*/
static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
struct iovec* iov,
unsigned int* out_size,
unsigned int* max_data,
int* freeAfter )
uint32_t* out_size,
uint32_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
unsigned int pos_desc; /* actual position in the description of the derived datatype */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */
int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */
uint32_t advance; /* number of bytes that we should advance the buffer */
long disp_desc = 0; /* compute displacement for truncated data */
int bConverted = 0; /* number of bytes converted this time */
dt_elem_desc_t* pElems;
int oCount = (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count;
char* pInput;
int iCount, rc;
unsigned int iov_count, total_bytes_converted = 0;
uint32_t iov_count, total_bytes_converted = 0;
/* For the general case always use the user data description */
pElems = pConvertor->pDesc->desc.desc;
@ -152,13 +152,13 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
struct iovec* iov,
unsigned int* out_size,
unsigned int* max_data,
int* freeAfter )
uint32_t* out_size,
uint32_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
unsigned int pos_desc; /* actual position in the description of the derived datatype */
unsigned int i; /* counter for basic datatype with extent */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
uint32_t i; /* counter for basic datatype with extent */
int bConverted = 0; /* number of bytes converted this time */
long lastDisp = 0;
size_t space = iov[0].iov_len, last_count = 0, last_blength = 0;
@ -266,7 +266,7 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
bConverted += last_count;
lastDisp += last_count;
}
if( pos_desc < (unsigned int)pStack->end_loop ) { /* cleanup the stack */
if( pos_desc < (uint32_t)pStack->end_loop ) { /* cleanup the stack */
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_blength,
lastDisp, pos_desc );
}
@ -279,16 +279,16 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
struct iovec* iov,
unsigned int* out_size,
unsigned int* max_data,
int* freeAfter )
uint32_t* out_size,
uint32_t* max_data,
int32_t* freeAfter )
{
dt_desc_t *pData = pConv->pDesc;
char* pDstBuf = pConv->pBaseBuf;
char* pSrcBuf = iov[0].iov_base;
int bConverted = 0;
long extent = pData->ub - pData->lb;
unsigned int length, remaining, i;
uint32_t length, remaining, i;
dt_stack_t* stack = &(pConv->pStack[1]);
*out_size = 1;
@ -344,12 +344,12 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
struct iovec* iov,
unsigned int* out_size,
unsigned int* max_data,
int* freeAfter )
uint32_t* out_size,
uint32_t* max_data,
int32_t* freeAfter )
{
dt_desc_t *pData = pConvertor->pDesc;
unsigned int length;
uint32_t length;
*freeAfter = 0;
if( pConvertor->bConverted == (pData->size * pConvertor->count) ) {
@ -383,13 +383,13 @@ int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
* basic datatype.
*/
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
static int copy_##TYPENAME( unsigned int count, \
char* from, unsigned int from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent ) \
static int copy_##TYPENAME( uint32_t count, \
char* from, uint32_t from_len, long from_extent, \
char* to, uint32_t to_len, long to_extent ) \
{ \
unsigned int i; \
unsigned int remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
unsigned int local_TYPE_size = (COUNT) * sizeof(TYPE); \
uint32_t i; \
uint32_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
uint32_t local_TYPE_size = (COUNT) * sizeof(TYPE); \
\
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
@ -416,22 +416,22 @@ static int copy_##TYPENAME( unsigned int count, \
return count; \
}
static int copy_bytes_1( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
static int copy_bytes_2( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
static int copy_bytes_4( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
static int copy_bytes_8( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
static int copy_bytes_12( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
static int copy_bytes_16( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
static int copy_bytes_20( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
static int copy_bytes_1( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_2( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_4( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_8( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_12( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_16( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
static int copy_bytes_20( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
#define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \
static int copy_##TYPENAME##_##COUNT( unsigned int count, \
char* from, unsigned int from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent) \
static int copy_##TYPENAME##_##COUNT( uint32_t count, \
char* from, uint32_t from_len, long from_extent, \
char* to, uint32_t to_len, long to_extent) \
{ \
unsigned int i; \
unsigned int remote_TYPE_size = (COUNT); /* TODO */ \
unsigned int local_TYPE_size = (COUNT); \
uint32_t i; \
uint32_t remote_TYPE_size = (COUNT); /* TODO */ \
uint32_t local_TYPE_size = (COUNT); \
\
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
@ -601,7 +601,7 @@ int ompi_convertor_need_buffers( ompi_convertor_t* pConvertor )
}
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, unsigned int flags,
int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, uint32_t flags,
dt_desc_t* pData, int count,
void* pUserBuf, int starting_point,
memalloc_fct_t allocfn )

Просмотреть файл

@ -19,7 +19,6 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type, lastLength = 0;
long totalDisp;
ompi_datatype_t* pData = pConvertor->pDesc;
int* remoteLength;
int loop_length;
@ -34,6 +33,12 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
*/
if( pConvertor->bConverted == (unsigned long)starting_point ) return OMPI_SUCCESS;
/* do we provide more place than necessary for the data ? */
if( starting_point >= (int)(pConvertor->count * pData->size) ) {
pConvertor->bConverted = pConvertor->count * pData->size;
return OMPI_SUCCESS;
}
pConvertor->stack_pos = 0;
pStack = pConvertor->pStack;
/* Fill the first position on the stack. This one correspond to the
@ -81,24 +86,32 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
pStack->disp = pElems[loop_length].disp;
pos_desc = 0;
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
remoteLength = (int*)alloca( sizeof(int) * (pConvertor->pDesc->btypes[DT_LOOP] + 1));
remoteLength[0] = 0; /* initial value set to ZERO */
/* The only way to get out of this loop is when we reach the desired position or
* when we finish the whole datatype.
*/
next_loop:
totalDisp = pStack->disp;
loop_length = remoteLength[pConvertor->stack_pos];
while( pos_desc >= 0 ) {
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems;
long extent;
/* as we reach the end of the loop the count should be decreased by one */
pStack->count--;
/* now we know the length of the loop. We can compute
* if the the starting_position will happend in one of the
* if the starting_position will happend in one of the
* iterations of this loop.
*/
remoteLength[pConvertor->stack_pos] = loop_length;
if( (loop_length * pStack->count) > resting_place ) {
/* OK here we stop in this loop. First save the loop
* on the stack, then save the position of the last data
/* We will stop somewhere on this loop. To avoid moving inside the loop
* multiple times, we can compute the index of the loop where we will
* stop. Once this index is computed we can then reparse the loop once
* until we find the correct position.
*/
int cnt = resting_place / loop_length;
if( pStack->index == -1 ) {
@ -110,27 +123,32 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
resting_place -= cnt * loop_length;
pStack->disp += cnt * extent;
pos_desc -= end_loop->items; /* go back to the first element in the loop */
pElems = &(pData->desc.desc[pos_desc]);
goto next_loop;
}
/* Not in this loop. Cleanup the stack and advance to the
* next data description.
*/
pConvertor->stack_pos--;
loop_length *= pStack->count;
resting_place -= loop_length; /* update the resting place */
/* if we ae embedded in another loop we should update it's length too */
pStack--;
pConvertor->stack_pos--;
if( pConvertor->stack_pos > 0 ) {
remoteLength[pConvertor->stack_pos] += loop_length;
}
pos_desc++;
pElems++;
goto next_loop;
}
if( pElems->type == DT_LOOP ) {
remoteLength[pConvertor->stack_pos + 1] = 0;
totalDisp = pElems->disp;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pData->desc.desc[pos_desc].count,
totalDisp, pos_desc + pElems->disp );
pStack->disp, pos_desc + pElems->disp );
pos_desc++;
pElems++;
loop_length = 0; /* starting a new loop */
goto next_loop;
}
while( pElems->flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
@ -141,7 +159,7 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
resting_place -= cnt * ompi_ddt_basicDatatypes[type]->size;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pElems->count - cnt,
totalDisp + pElems->disp + cnt * pElems->extent,
pElems->disp + cnt * pElems->extent,
pos_desc );
pConvertor->bConverted += (starting_point - resting_place);
return OMPI_SUCCESS;
@ -152,9 +170,8 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
pElems++;
}
}
PUSH_STACK( pStack, pConvertor->stack_pos, 0, 0, 0, 0 );
/* Correctly update the bConverted field */
pConvertor->bConverted = starting_point - resting_place;
pConvertor->bConverted = pData->size * pConvertor->count;
return OMPI_SUCCESS;
}