A lot of improuvements on the datatype. We still use the general version (BEWARE not suitable for performances ...)
- have a standard usage of the stack between functions. - correctly compute the stack if we provide an offset - remove all u_int* references and replace them with uint* - small others cleanups This commit was SVN r3403.
Этот коммит содержится в:
родитель
d8294afea9
Коммит
44bcf51be6
@ -55,16 +55,16 @@ OMPI_DECLSPEC extern ompi_pointer_array_t *ompi_datatype_f_to_c_table;
|
|||||||
* by a set of basic elements.
|
* by a set of basic elements.
|
||||||
*/
|
*/
|
||||||
typedef struct __dt_elem_desc {
|
typedef struct __dt_elem_desc {
|
||||||
u_int16_t flags; /**< flags for the record */
|
uint16_t flags; /**< flags for the record */
|
||||||
u_int16_t type; /**< the basic data type id */
|
uint16_t type; /**< the basic data type id */
|
||||||
u_int32_t count; /**< number of elements */
|
uint32_t count; /**< number of elements */
|
||||||
long disp; /**< displacement of the first element */
|
long disp; /**< displacement of the first element */
|
||||||
u_int32_t extent; /**< extent of each element */
|
uint32_t extent; /**< extent of each element */
|
||||||
} dt_elem_desc_t;
|
} dt_elem_desc_t;
|
||||||
|
|
||||||
typedef struct __dt_struct_desc {
|
typedef struct __dt_struct_desc {
|
||||||
u_int32_t length; /* the maximum number of elements in the description array */
|
uint32_t length; /* the maximum number of elements in the description array */
|
||||||
u_int32_t used; /* the number of used elements in the description array */
|
uint32_t used; /* the number of used elements in the description array */
|
||||||
dt_elem_desc_t* desc;
|
dt_elem_desc_t* desc;
|
||||||
} dt_type_desc_t;
|
} dt_type_desc_t;
|
||||||
|
|
||||||
@ -74,15 +74,15 @@ typedef struct ompi_datatype_t {
|
|||||||
ompi_object_t super; /**< basic superclass */
|
ompi_object_t super; /**< basic superclass */
|
||||||
unsigned long size; /**< total size in bytes of the memory used by the data if
|
unsigned long size; /**< total size in bytes of the memory used by the data if
|
||||||
* the data is put on a contiguous buffer */
|
* the data is put on a contiguous buffer */
|
||||||
u_int32_t align; /**< data should be aligned to */
|
uint32_t align; /**< data should be aligned to */
|
||||||
long true_lb;
|
long true_lb;
|
||||||
long true_ub; /**< the true ub of the data without user defined lb and ub */
|
long true_ub; /**< the true ub of the data without user defined lb and ub */
|
||||||
long lb; /**< lower bound in memory */
|
long lb; /**< lower bound in memory */
|
||||||
long ub; /**< upper bound in memory */
|
long ub; /**< upper bound in memory */
|
||||||
u_int16_t flags; /**< the flags */
|
uint16_t flags; /**< the flags */
|
||||||
u_int16_t id; /**< data id, normally the index in the data array. */
|
uint16_t id; /**< data id, normally the index in the data array. */
|
||||||
u_int32_t nbElems; /**< total number of elements inside the datatype */
|
uint32_t nbElems; /**< total number of elements inside the datatype */
|
||||||
u_int64_t bdt_used; /**< which basic datatypes are used in the data description */
|
uint64_t bdt_used; /**< which basic datatypes are used in the data description */
|
||||||
|
|
||||||
/* Attribute fields */
|
/* Attribute fields */
|
||||||
ompi_hash_table_t *d_keyhash;
|
ompi_hash_table_t *d_keyhash;
|
||||||
@ -95,7 +95,7 @@ typedef struct ompi_datatype_t {
|
|||||||
|
|
||||||
/* basic elements count used to compute the size of the datatype for
|
/* basic elements count used to compute the size of the datatype for
|
||||||
* remote nodes */
|
* remote nodes */
|
||||||
u_int32_t btypes[DT_MAX_PREDEFINED];
|
uint32_t btypes[DT_MAX_PREDEFINED];
|
||||||
} dt_desc_t, ompi_datatype_t;
|
} dt_desc_t, ompi_datatype_t;
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION( ompi_datatype_t );
|
OBJ_CLASS_DECLARATION( ompi_datatype_t );
|
||||||
@ -116,23 +116,23 @@ void ompi_ddt_dump( dt_desc_t* pData );
|
|||||||
OMPI_DECLSPEC int ompi_ddt_duplicate( dt_desc_t* oldType, dt_desc_t** newType );
|
OMPI_DECLSPEC int ompi_ddt_duplicate( dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_contiguous( int count, dt_desc_t* oldType, dt_desc_t** newType );
|
OMPI_DECLSPEC int ompi_ddt_create_contiguous( int count, dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_vector( int count, int bLength, long stride,
|
OMPI_DECLSPEC int ompi_ddt_create_vector( int count, int bLength, long stride,
|
||||||
dt_desc_t* oldType, dt_desc_t** newType );
|
dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_hvector( int count, int bLength, long stride,
|
OMPI_DECLSPEC int ompi_ddt_create_hvector( int count, int bLength, long stride,
|
||||||
dt_desc_t* oldType, dt_desc_t** newType );
|
dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_indexed( int count, int* pBlockLength, int* pDisp,
|
OMPI_DECLSPEC int ompi_ddt_create_indexed( int count, int* pBlockLength, int* pDisp,
|
||||||
dt_desc_t* oldType, dt_desc_t** newType );
|
dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_hindexed( int count, int* pBlockLength, long* pDisp,
|
OMPI_DECLSPEC int ompi_ddt_create_hindexed( int count, int* pBlockLength, long* pDisp,
|
||||||
dt_desc_t* oldType, dt_desc_t** newType );
|
dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_indexed_block( int count, int bLength, int* pDisp,
|
OMPI_DECLSPEC int ompi_ddt_create_indexed_block( int count, int bLength, int* pDisp,
|
||||||
dt_desc_t* oldType, dt_desc_t** newType );
|
dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_struct( int count, int* pBlockLength, long* pDisp,
|
OMPI_DECLSPEC int ompi_ddt_create_struct( int count, int* pBlockLength, long* pDisp,
|
||||||
dt_desc_t** pTypes, dt_desc_t** newType );
|
dt_desc_t** pTypes, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType );
|
OMPI_DECLSPEC int ompi_ddt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
|
OMPI_DECLSPEC int ompi_ddt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
|
||||||
int order, dt_desc_t* oldType, dt_desc_t** newType );
|
int order, dt_desc_t* oldType, dt_desc_t** newType );
|
||||||
OMPI_DECLSPEC int ompi_ddt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib,
|
OMPI_DECLSPEC int ompi_ddt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib,
|
||||||
int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType,
|
int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType,
|
||||||
dt_desc_t** newType );
|
dt_desc_t** newType );
|
||||||
|
|
||||||
OMPI_DECLSPEC int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent );
|
OMPI_DECLSPEC int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent );
|
||||||
|
|
||||||
@ -158,30 +158,30 @@ OMPI_DECLSPEC int ompi_ddt_optimize_short( dt_desc_t* pData, int count, dt_type_
|
|||||||
/* flags for the datatypes */
|
/* flags for the datatypes */
|
||||||
|
|
||||||
typedef int (*conversion_fct_t)( unsigned int count,
|
typedef int (*conversion_fct_t)( unsigned int count,
|
||||||
void* from, unsigned int from_len, long from_extent,
|
void* from, uint32_t from_len, long from_extent,
|
||||||
void* to, unsigned int in_length, long to_extent );
|
void* to, uint32_t in_length, long to_extent );
|
||||||
|
|
||||||
typedef struct __dt_stack dt_stack_t;
|
typedef struct __dt_stack dt_stack_t;
|
||||||
typedef struct ompi_convertor_t ompi_convertor_t;
|
typedef struct ompi_convertor_t ompi_convertor_t;
|
||||||
typedef int (*convertor_advance_fct_t)( ompi_convertor_t* pConvertor,
|
typedef int (*convertor_advance_fct_t)( ompi_convertor_t* pConvertor,
|
||||||
struct iovec* pInputv,
|
struct iovec* pInputv,
|
||||||
unsigned int* inputCount,
|
uint32_t* inputCount,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter );
|
int32_t* freeAfter );
|
||||||
typedef void*(*memalloc_fct_t)( size_t* pLength );
|
typedef void*(*memalloc_fct_t)( size_t* pLength );
|
||||||
|
|
||||||
struct ompi_convertor_t {
|
struct ompi_convertor_t {
|
||||||
ompi_object_t super; /**< basic superclass */
|
ompi_object_t super; /**< basic superclass */
|
||||||
dt_desc_t* pDesc; /**< the datatype description associated with the convertor */
|
dt_desc_t* pDesc; /**< the datatype description associated with the convertor */
|
||||||
u_int32_t remoteArch; /**< the remote architecture */
|
uint32_t remoteArch; /**< the remote architecture */
|
||||||
dt_stack_t* pStack; /**< the local stack for the actual conversion */
|
dt_stack_t* pStack; /**< the local stack for the actual conversion */
|
||||||
u_int32_t converted; /**< the number of already converted elements */
|
uint32_t converted; /**< the number of already converted elements */
|
||||||
u_int32_t bConverted; /**< the size of already converted elements in bytes */
|
uint32_t bConverted; /**< the size of already converted elements in bytes */
|
||||||
u_int32_t flags; /**< the properties of this convertor */
|
uint32_t flags; /**< the properties of this convertor */
|
||||||
u_int32_t count; /**< the total number of full datatype elements */
|
uint32_t count; /**< the total number of full datatype elements */
|
||||||
u_int32_t stack_pos; /**< the actual position on the stack */
|
uint32_t stack_pos; /**< the actual position on the stack */
|
||||||
char* pBaseBuf; /**< initial buffer as supplied by the user */
|
char* pBaseBuf; /**< initial buffer as supplied by the user */
|
||||||
u_int32_t available_space; /**< total available space */
|
uint32_t available_space; /**< total available space */
|
||||||
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
|
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
|
||||||
memalloc_fct_t memAlloc_fn; /**< pointer to the memory allocation function */
|
memalloc_fct_t memAlloc_fn; /**< pointer to the memory allocation function */
|
||||||
conversion_fct_t* pFunctions; /**< the convertor functions pointer */
|
conversion_fct_t* pFunctions; /**< the convertor functions pointer */
|
||||||
|
@ -105,19 +105,19 @@ struct __dt_stack {
|
|||||||
* for the name of the fields.
|
* for the name of the fields.
|
||||||
*/
|
*/
|
||||||
typedef struct __dt_loop_desc {
|
typedef struct __dt_loop_desc {
|
||||||
u_int16_t flags; /**< flags for the record */
|
uint16_t flags; /**< flags for the record */
|
||||||
u_int16_t type; /**< the basic data type id */
|
uint16_t type; /**< the basic data type id */
|
||||||
u_int32_t loops; /**< number of times the loop have to be done */
|
uint32_t loops; /**< number of times the loop have to be done */
|
||||||
long items; /**< number of items in the loop */
|
long items; /**< number of items in the loop */
|
||||||
u_int32_t extent; /**< extent of the whole loop */
|
uint32_t extent; /**< extent of the whole loop */
|
||||||
} dt_loop_desc_t;
|
} dt_loop_desc_t;
|
||||||
|
|
||||||
typedef struct __dt_endloop_desc {
|
typedef struct __dt_endloop_desc {
|
||||||
u_int16_t flags; /**< flags for the record */
|
uint16_t flags; /**< flags for the record */
|
||||||
u_int16_t type; /**< the basic data type id */
|
uint16_t type; /**< the basic data type id */
|
||||||
u_int32_t items; /**< number of items in the loop */
|
uint32_t items; /**< number of items in the loop */
|
||||||
long total_extent; /**< total extent of the loop taking in account the repetitions */
|
long total_extent; /**< total extent of the loop taking in account the repetitions */
|
||||||
u_int32_t size; /**< real size of the data in the loop */
|
uint32_t size; /**< real size of the data in the loop */
|
||||||
} dt_endloop_desc_t;
|
} dt_endloop_desc_t;
|
||||||
|
|
||||||
/* keep the last 16 bits free for data flags */
|
/* keep the last 16 bits free for data flags */
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
|
int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
|
||||||
unsigned int count, long disp, long extent )
|
unsigned int count, long disp, long extent )
|
||||||
{
|
{
|
||||||
u_int32_t newLength, place_needed = 0, i;
|
uint32_t newLength, place_needed = 0, i;
|
||||||
short localFlags = 0; /* no specific options yet */
|
short localFlags = 0; /* no specific options yet */
|
||||||
dt_elem_desc_t *pLast, *pLoop = NULL;
|
dt_elem_desc_t *pLast, *pLoop = NULL;
|
||||||
long lb, ub;
|
long lb, ub;
|
||||||
|
@ -208,7 +208,7 @@ int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
|
|||||||
free( (PDST)->opt_desc.desc ); \
|
free( (PDST)->opt_desc.desc ); \
|
||||||
(PDST)->opt_desc = (PSRC)->opt_desc; \
|
(PDST)->opt_desc = (PSRC)->opt_desc; \
|
||||||
memcpy( (PDST)->btypes, (PSRC)->btypes, \
|
memcpy( (PDST)->btypes, (PSRC)->btypes, \
|
||||||
DT_MAX_PREDEFINED * sizeof(u_int32_t) ); \
|
DT_MAX_PREDEFINED * sizeof(uint32_t) ); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
#define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2 ) \
|
#define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2 ) \
|
||||||
|
@ -12,15 +12,15 @@
|
|||||||
|
|
||||||
static
|
static
|
||||||
int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
||||||
struct iovec* iov, unsigned int* out_size,
|
struct iovec* iov, uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int32_t* freeAfter )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
unsigned int pos_desc; /* actual position in the description of the derived datatype */
|
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||||
int count_desc; /* the number of items already done in the actual pos_desc */
|
int count_desc; /* the number of items already done in the actual pos_desc */
|
||||||
int type; /* type at current position */
|
int type; /* type at current position */
|
||||||
unsigned int advance; /* number of bytes that we should advance the buffer */
|
uint32_t advance; /* number of bytes that we should advance the buffer */
|
||||||
long disp_desc = 0; /* compute displacement for truncated data */
|
long disp_desc = 0; /* compute displacement for truncated data */
|
||||||
int bConverted = 0; /* number of bytes converted this time */
|
int bConverted = 0; /* number of bytes converted this time */
|
||||||
dt_desc_t *pData = pConvertor->pDesc;
|
dt_desc_t *pData = pConvertor->pDesc;
|
||||||
@ -29,7 +29,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
|||||||
int oCount = (pData->ub - pData->lb) * pConvertor->count;
|
int oCount = (pData->ub - pData->lb) * pConvertor->count;
|
||||||
char* pInput;
|
char* pInput;
|
||||||
int iCount, rc;
|
int iCount, rc;
|
||||||
unsigned int iov_count, total_bytes_converted = 0;
|
uint32_t iov_count, total_bytes_converted = 0;
|
||||||
|
|
||||||
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", (void*)pConvertor,
|
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", (void*)pConvertor,
|
||||||
iov[0].iov_base, iov[0].iov_len, *out_size );
|
iov[0].iov_base, iov[0].iov_len, *out_size );
|
||||||
@ -51,7 +51,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
|||||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||||
bConverted = 0;
|
bConverted = 0;
|
||||||
if( iov[iov_count].iov_base == NULL ) {
|
if( iov[iov_count].iov_base == NULL ) {
|
||||||
unsigned int length = iov[iov_count].iov_len;
|
uint32_t length = iov[iov_count].iov_len;
|
||||||
if( length <= 0 )
|
if( length <= 0 )
|
||||||
length = pConvertor->count * pData->size - pConvertor->bConverted - bConverted;
|
length = pConvertor->count * pData->size - pConvertor->bConverted - bConverted;
|
||||||
if( (*max_data) < length )
|
if( (*max_data) < length )
|
||||||
@ -140,17 +140,17 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
|||||||
static
|
static
|
||||||
int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int* out_size,
|
uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int* freeAfter )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
u_int32_t pos_desc; /* actual position in the description of the derived datatype */
|
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||||
int type; /* type at current position */
|
int type; /* type at current position */
|
||||||
int i; /* index for basic elements with extent */
|
int i; /* index for basic elements with extent */
|
||||||
int bConverted = 0; /* number of bytes converted/moved this time */
|
int bConverted = 0; /* number of bytes converted/moved this time */
|
||||||
long lastDisp = 0, last_count = 0;
|
long lastDisp = 0, last_count = 0;
|
||||||
u_int32_t space = iov[0].iov_len, last_blength = 0;
|
uint32_t space = iov[0].iov_len, last_blength = 0;
|
||||||
char* pDestBuf;
|
char* pDestBuf;
|
||||||
dt_desc_t* pData = pConv->pDesc;
|
dt_desc_t* pData = pConv->pDesc;
|
||||||
dt_elem_desc_t* pElems;
|
dt_elem_desc_t* pElems;
|
||||||
@ -306,20 +306,20 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
|||||||
* return the pointer to the contiguous piece of memory to the upper level.
|
* return the pointer to the contiguous piece of memory to the upper level.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
|
int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int *out_size,
|
uint32_t *out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int* freeAfter )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
int pos_desc; /* actual position in the description of the derived datatype */
|
int pos_desc; /* actual position in the description of the derived datatype */
|
||||||
int i; /* index for basic elements with extent */
|
int i; /* index for basic elements with extent */
|
||||||
u_int32_t iov_pos = 0; /* index in the iovec that we put data inside */
|
uint32_t iov_pos = 0; /* index in the iovec that we put data inside */
|
||||||
int bConverted = 0; /* number of bytes converted/moved this time */
|
int bConverted = 0; /* number of bytes converted/moved this time */
|
||||||
u_int32_t space_on_iovec; /* amount of free space on the current iovec */
|
uint32_t space_on_iovec; /* amount of free space on the current iovec */
|
||||||
long lastDisp = 0, last_count = 0;
|
long lastDisp = 0, last_count = 0;
|
||||||
u_int32_t space = *max_data, last_blength = 0, saveLength;
|
uint32_t space = *max_data, last_blength = 0, saveLength;
|
||||||
char *pDestBuf, *savePos;
|
char *pDestBuf, *savePos;
|
||||||
dt_desc_t* pData = pConv->pDesc;
|
dt_desc_t* pData = pConv->pDesc;
|
||||||
dt_elem_desc_t* pElems;
|
dt_elem_desc_t* pElems;
|
||||||
@ -571,12 +571,15 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
|
|||||||
return (pConv->bConverted == (pData->size * pConv->count));
|
return (pConv->bConverted == (pData->size * pConv->count));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* the Contig versions does not use the stack. They can easily retrieve
|
||||||
|
* the status with just the informations from pConvertor->bConverted.
|
||||||
|
*/
|
||||||
static
|
static
|
||||||
int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
|
int ompi_convertor_pack_no_conversion_contig( ompi_convertor_t* pConv,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int* out_size,
|
uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int* freeAfter )
|
||||||
{
|
{
|
||||||
dt_desc_t* pData = pConv->pDesc;
|
dt_desc_t* pData = pConv->pDesc;
|
||||||
char* pSrc = pConv->pBaseBuf + pData->true_lb;
|
char* pSrc = pConv->pBaseBuf + pData->true_lb;
|
||||||
@ -584,8 +587,9 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
|
|||||||
char* pDest;
|
char* pDest;
|
||||||
size_t length = pData->size * pConv->count;
|
size_t length = pData->size * pConv->count;
|
||||||
long extent;
|
long extent;
|
||||||
u_int32_t max_allowed = *max_data;
|
uint32_t max_allowed = *max_data;
|
||||||
u_int32_t i, index;
|
uint32_t i, index;
|
||||||
|
uint32_t iov_count, total_bytes_converted = 0;
|
||||||
|
|
||||||
i = pConv->bConverted / pData->size; /* how many we already pack */
|
i = pConv->bConverted / pData->size; /* how many we already pack */
|
||||||
extent = pData->ub - pData->lb;
|
extent = pData->ub - pData->lb;
|
||||||
@ -595,99 +599,102 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
|
|||||||
/* There are some optimizations that can be done if the upper level
|
/* There are some optimizations that can be done if the upper level
|
||||||
* does not provide a buffer.
|
* does not provide a buffer.
|
||||||
*/
|
*/
|
||||||
if( iov[0].iov_base == NULL ) {
|
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||||
/* special case for small data. We avoid allocating memory if we
|
if( iov[iov_count].iov_base == NULL ) {
|
||||||
* can fill the iovec directly with the address of the remaining
|
/* special case for small data. We avoid allocating memory if we
|
||||||
* data.
|
* can fill the iovec directly with the address of the remaining
|
||||||
*/
|
* data.
|
||||||
if( (pConv->count - i) < (*out_size) ) {
|
*/
|
||||||
for( index = 0; i < pConv->count; i++, index++ ) {
|
if( (pConv->count - i) < ((*out_size) - iov_count) ) {
|
||||||
iov[index].iov_base = pSrc;
|
for( index = iov_count; i < pConv->count; i++, index++ ) {
|
||||||
iov[index].iov_len = pData->size;
|
iov[index].iov_base = pSrc;
|
||||||
pSrc += extent;
|
iov[index].iov_len = pData->size;
|
||||||
pConv->bConverted += pData->size;
|
pSrc += extent;
|
||||||
}
|
pConv->bConverted += pData->size;
|
||||||
*out_size = index;
|
}
|
||||||
*max_data = index * pData->size;
|
*out_size = iov_count + index;
|
||||||
return 1; /* we're done */
|
*max_data = total_bytes_converted + index * pData->size;
|
||||||
}
|
return 1; /* we're done */
|
||||||
/* now special case for big contiguous data with gaps around */
|
}
|
||||||
if( pData->size >= IOVEC_MEM_LIMIT ) {
|
/* now special case for big contiguous data with gaps around */
|
||||||
/* as we dont have to copy any data, we can simply fill the iovecs
|
if( pData->size >= IOVEC_MEM_LIMIT ) {
|
||||||
* with data from the user data description.
|
/* as we dont have to copy any data, we can simply fill the iovecs
|
||||||
*/
|
* with data from the user data description.
|
||||||
for( index = 0; (i < pConv->count) && (index < (*out_size));
|
*/
|
||||||
i++, index++ ) {
|
for( index = iov_count; (i < pConv->count) && (index < (*out_size));
|
||||||
if( max_allowed < pData->size ) {
|
i++, index++ ) {
|
||||||
iov[index].iov_base = pSrc;
|
if( max_allowed < pData->size ) {
|
||||||
iov[index].iov_len = max_allowed;
|
iov[index].iov_base = pSrc;
|
||||||
max_allowed = 0;
|
iov[index].iov_len = max_allowed;
|
||||||
printf( "%s:%d Possible problem here\n", __FILE__, __LINE__ );
|
max_allowed = 0;
|
||||||
break;
|
printf( "%s:%d Possible problem here\n", __FILE__, __LINE__ );
|
||||||
} else {
|
break;
|
||||||
iov[index].iov_base = pSrc;
|
} else {
|
||||||
iov[index].iov_len = pData->size;
|
iov[index].iov_base = pSrc;
|
||||||
pSrc += extent;
|
iov[index].iov_len = pData->size;
|
||||||
}
|
pSrc += extent;
|
||||||
max_allowed -= iov[index].iov_len;
|
}
|
||||||
}
|
max_allowed -= iov[index].iov_len;
|
||||||
*out_size = index;
|
}
|
||||||
*max_data = (*max_data) - max_allowed;
|
*out_size = index;
|
||||||
pConv->bConverted += (*max_data);
|
*max_data = total_bytes_converted + - max_allowed;
|
||||||
return (pConv->bConverted == length );
|
pConv->bConverted += total_bytes_converted;
|
||||||
}
|
return (pConv->bConverted == length );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( (long)pData->size == extent ) { /* that really contiguous */
|
||||||
|
if( iov[iov_count].iov_base == NULL ) {
|
||||||
|
iov[iov_count].iov_base = pSrc; /* + pConv->bConverted; */
|
||||||
|
if( (pConv->bConverted + iov[iov_count].iov_len) > length )
|
||||||
|
iov[iov_count].iov_len = length - pConv->bConverted;
|
||||||
|
} else {
|
||||||
|
/* contiguous data just memcpy the smallest data in the user buffer */
|
||||||
|
iov[iov_count].iov_len = IMIN( iov[iov_count].iov_len, length );
|
||||||
|
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[iov_count].iov_len,
|
||||||
|
pConv->pBaseBuf, pData, pConv->count );
|
||||||
|
MEMCPY( iov[iov_count].iov_base, pSrc, iov[iov_count].iov_len);
|
||||||
|
}
|
||||||
|
*max_data = iov[iov_count].iov_len;
|
||||||
|
} else {
|
||||||
|
uint32_t done, counter;
|
||||||
|
|
||||||
|
if( iov[iov_count].iov_base == NULL ) {
|
||||||
|
iov[iov_count].iov_base = pConv->memAlloc_fn( &(iov[iov_count].iov_len) );
|
||||||
|
(*freeAfter) |= (1 << 0);
|
||||||
|
if( max_allowed < iov[iov_count].iov_len )
|
||||||
|
iov[iov_count].iov_len = max_allowed;
|
||||||
|
else
|
||||||
|
max_allowed = iov[iov_count].iov_len;
|
||||||
|
}
|
||||||
|
pDest = iov[iov_count].iov_base;
|
||||||
|
done = pConv->bConverted - i * pData->size; /* how much data left last time */
|
||||||
|
pSrc += done;
|
||||||
|
if( done != 0 ) { /* still some data to copy from the last time */
|
||||||
|
done = pData->size - done;
|
||||||
|
OMPI_DDT_SAFEGUARD_POINTER( pSrc, done, pConv->pBaseBuf, pData, pConv->count );
|
||||||
|
MEMCPY( pDest, pSrc, done );
|
||||||
|
pDest += done;
|
||||||
|
max_allowed -= done;
|
||||||
|
i++; /* just to compute the correct source pointer */
|
||||||
|
}
|
||||||
|
pSrc = pConv->pBaseBuf + pData->true_lb + i * extent;
|
||||||
|
counter = max_allowed / pData->size;
|
||||||
|
if( counter > pConv->count ) counter = pConv->count;
|
||||||
|
for( i = 0; i < counter; i++ ) {
|
||||||
|
OMPI_DDT_SAFEGUARD_POINTER( pSrc, pData->size, pConv->pBaseBuf, pData, pConv->count );
|
||||||
|
MEMCPY( pDest, pSrc, pData->size );
|
||||||
|
pDest += pData->size;
|
||||||
|
pSrc += extent;
|
||||||
|
}
|
||||||
|
max_allowed -= (counter * pData->size);
|
||||||
|
total_bytes_converted += iov[iov_count].iov_len - max_allowed;
|
||||||
|
iov[iov_count].iov_len = *max_data;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
*max_data = total_bytes_converted;
|
||||||
if( (long)pData->size == extent ) { /* that really contiguous */
|
pConv->bConverted += iov[iov_count].iov_len;
|
||||||
if( iov[0].iov_base == NULL ) {
|
*out_size = iov_count;
|
||||||
iov[0].iov_base = pSrc; /* + pConv->bConverted; */
|
|
||||||
if( (pConv->bConverted + iov[0].iov_len) > length )
|
|
||||||
iov[0].iov_len = length - pConv->bConverted;
|
|
||||||
} else {
|
|
||||||
/* contiguous data just memcpy the smallest data in the user buffer */
|
|
||||||
iov[0].iov_len = IMIN( iov[0].iov_len, length );
|
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[0].iov_len,
|
|
||||||
pConv->pBaseBuf, pData, pConv->count );
|
|
||||||
MEMCPY( iov[0].iov_base, pSrc, iov[0].iov_len);
|
|
||||||
}
|
|
||||||
*max_data = iov[0].iov_len;
|
|
||||||
} else {
|
|
||||||
u_int32_t done, counter;
|
|
||||||
|
|
||||||
if( iov[0].iov_base == NULL ) {
|
|
||||||
iov[0].iov_base = pConv->memAlloc_fn( &(iov[0].iov_len) );
|
|
||||||
(*freeAfter) |= (1 << 0);
|
|
||||||
if( max_allowed < iov[0].iov_len )
|
|
||||||
iov[0].iov_len = max_allowed;
|
|
||||||
else
|
|
||||||
max_allowed = iov[0].iov_len;
|
|
||||||
}
|
|
||||||
pDest = iov[0].iov_base;
|
|
||||||
done = pConv->bConverted - i * pData->size; /* how much data left last time */
|
|
||||||
pSrc += done;
|
|
||||||
if( done != 0 ) { /* still some data to copy from the last time */
|
|
||||||
done = pData->size - done;
|
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( pSrc, done, pConv->pBaseBuf, pData, pConv->count );
|
|
||||||
MEMCPY( pDest, pSrc, done );
|
|
||||||
pDest += done;
|
|
||||||
max_allowed -= done;
|
|
||||||
i++; /* just to compute the correct source pointer */
|
|
||||||
}
|
|
||||||
pSrc = pConv->pBaseBuf + pData->true_lb + i * extent;
|
|
||||||
counter = max_allowed / pData->size;
|
|
||||||
if( counter > pConv->count ) counter = pConv->count;
|
|
||||||
for( i = 0; i < counter; i++ ) {
|
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( pSrc, pData->size, pConv->pBaseBuf, pData, pConv->count );
|
|
||||||
MEMCPY( pDest, pSrc, pData->size );
|
|
||||||
pDest += pData->size;
|
|
||||||
pSrc += extent;
|
|
||||||
}
|
|
||||||
max_allowed -= (counter * pData->size);
|
|
||||||
*max_data = iov[0].iov_len - max_allowed;
|
|
||||||
iov[0].iov_len = *max_data;
|
|
||||||
}
|
|
||||||
pConv->bConverted += iov[0].iov_len;
|
|
||||||
*out_size = 1;
|
|
||||||
return (pConv->bConverted == length);
|
return (pConv->bConverted == length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -716,12 +723,12 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
|
|||||||
*/
|
*/
|
||||||
int ompi_convertor_pack( ompi_convertor_t* pConv,
|
int ompi_convertor_pack( ompi_convertor_t* pConv,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int* out_size,
|
uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int* freeAfter )
|
||||||
{
|
{
|
||||||
dt_desc_t* pData = pConv->pDesc;
|
dt_desc_t* pData = pConv->pDesc;
|
||||||
u_int32_t done = 0, index = 0;
|
uint32_t done = 0, index = 0;
|
||||||
|
|
||||||
*freeAfter = 0; /* nothing to free yet */
|
*freeAfter = 0; /* nothing to free yet */
|
||||||
/* TODO should use the remote size */
|
/* TODO should use the remote size */
|
||||||
@ -762,7 +769,7 @@ int ompi_convertor_pack( ompi_convertor_t* pConv,
|
|||||||
|
|
||||||
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
|
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
|
||||||
int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
|
int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
|
||||||
unsigned int flags,
|
uint32_t flags,
|
||||||
dt_desc_t* dt,
|
dt_desc_t* dt,
|
||||||
int count,
|
int count,
|
||||||
void* pUserBuf,
|
void* pUserBuf,
|
||||||
@ -792,14 +799,11 @@ int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
|
|||||||
pConv->memAlloc_fn = allocfn;
|
pConv->memAlloc_fn = allocfn;
|
||||||
if( dt->flags & DT_FLAG_CONTIGUOUS ) {
|
if( dt->flags & DT_FLAG_CONTIGUOUS ) {
|
||||||
pConv->flags |= DT_FLAG_CONTIGUOUS | CONVERTOR_HOMOGENEOUS;
|
pConv->flags |= DT_FLAG_CONTIGUOUS | CONVERTOR_HOMOGENEOUS;
|
||||||
pConv->fAdvance = ompi_convertor_pack_homogeneous_contig;
|
pConv->fAdvance = ompi_convertor_pack_no_conversion_contig;
|
||||||
} else {
|
} else {
|
||||||
/* TODO handle the sender convert case */
|
/* TODO handle the sender convert case */
|
||||||
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
|
pConv->fAdvance = ompi_convertor_pack_no_conversion_contig;
|
||||||
pConv->fAdvance = ompi_convertor_pack_homogeneous;
|
pConv->fAdvance = ompi_convertor_pack_no_conversion;
|
||||||
#if defined(ONE_STEP)
|
|
||||||
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
|
|
||||||
#endif /* ONE_STEP */
|
|
||||||
}
|
}
|
||||||
pConv->fAdvance = ompi_convertor_pack_general;
|
pConv->fAdvance = ompi_convertor_pack_general;
|
||||||
if( starting_pos != 0 ) {
|
if( starting_pos != 0 ) {
|
||||||
@ -863,7 +867,7 @@ ompi_convertor_t* ompi_convertor_get_copy( ompi_convertor_t* pConvertor )
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Actually we suppose that we can only do receiver side conversion */
|
/* Actually we suppose that we can only do receiver side conversion */
|
||||||
int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, unsigned int* pSize )
|
int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, uint32_t* pSize )
|
||||||
{
|
{
|
||||||
int ddt_size = 0;
|
int ddt_size = 0;
|
||||||
|
|
||||||
@ -874,7 +878,7 @@ int ompi_convertor_get_packed_size( ompi_convertor_t* pConv, unsigned int* pSize
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ompi_convertor_get_unpacked_size( ompi_convertor_t* pConv, unsigned int* pSize )
|
int ompi_convertor_get_unpacked_size( ompi_convertor_t* pConv, uint32_t* pSize )
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
dt_desc_t* pData = pConv->pDesc;
|
dt_desc_t* pData = pConv->pDesc;
|
||||||
|
@ -40,22 +40,22 @@ void ompi_ddt_dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDe
|
|||||||
*/
|
*/
|
||||||
static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
|
static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int* out_size,
|
uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int32_t* freeAfter )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
unsigned int pos_desc; /* actual position in the description of the derived datatype */
|
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||||
int count_desc; /* the number of items already done in the actual pos_desc */
|
int count_desc; /* the number of items already done in the actual pos_desc */
|
||||||
int type; /* type at current position */
|
int type; /* type at current position */
|
||||||
unsigned int advance; /* number of bytes that we should advance the buffer */
|
uint32_t advance; /* number of bytes that we should advance the buffer */
|
||||||
long disp_desc = 0; /* compute displacement for truncated data */
|
long disp_desc = 0; /* compute displacement for truncated data */
|
||||||
int bConverted = 0; /* number of bytes converted this time */
|
int bConverted = 0; /* number of bytes converted this time */
|
||||||
dt_elem_desc_t* pElems;
|
dt_elem_desc_t* pElems;
|
||||||
int oCount = (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count;
|
int oCount = (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count;
|
||||||
char* pInput;
|
char* pInput;
|
||||||
int iCount, rc;
|
int iCount, rc;
|
||||||
unsigned int iov_count, total_bytes_converted = 0;
|
uint32_t iov_count, total_bytes_converted = 0;
|
||||||
|
|
||||||
/* For the general case always use the user data description */
|
/* For the general case always use the user data description */
|
||||||
pElems = pConvertor->pDesc->desc.desc;
|
pElems = pConvertor->pDesc->desc.desc;
|
||||||
@ -152,13 +152,13 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
|
|||||||
|
|
||||||
static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int* out_size,
|
uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int32_t* freeAfter )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
unsigned int pos_desc; /* actual position in the description of the derived datatype */
|
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||||
unsigned int i; /* counter for basic datatype with extent */
|
uint32_t i; /* counter for basic datatype with extent */
|
||||||
int bConverted = 0; /* number of bytes converted this time */
|
int bConverted = 0; /* number of bytes converted this time */
|
||||||
long lastDisp = 0;
|
long lastDisp = 0;
|
||||||
size_t space = iov[0].iov_len, last_count = 0, last_blength = 0;
|
size_t space = iov[0].iov_len, last_count = 0, last_blength = 0;
|
||||||
@ -266,7 +266,7 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
|||||||
bConverted += last_count;
|
bConverted += last_count;
|
||||||
lastDisp += last_count;
|
lastDisp += last_count;
|
||||||
}
|
}
|
||||||
if( pos_desc < (unsigned int)pStack->end_loop ) { /* cleanup the stack */
|
if( pos_desc < (uint32_t)pStack->end_loop ) { /* cleanup the stack */
|
||||||
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_blength,
|
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_blength,
|
||||||
lastDisp, pos_desc );
|
lastDisp, pos_desc );
|
||||||
}
|
}
|
||||||
@ -279,16 +279,16 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
|||||||
|
|
||||||
static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
|
static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int* out_size,
|
uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int32_t* freeAfter )
|
||||||
{
|
{
|
||||||
dt_desc_t *pData = pConv->pDesc;
|
dt_desc_t *pData = pConv->pDesc;
|
||||||
char* pDstBuf = pConv->pBaseBuf;
|
char* pDstBuf = pConv->pBaseBuf;
|
||||||
char* pSrcBuf = iov[0].iov_base;
|
char* pSrcBuf = iov[0].iov_base;
|
||||||
int bConverted = 0;
|
int bConverted = 0;
|
||||||
long extent = pData->ub - pData->lb;
|
long extent = pData->ub - pData->lb;
|
||||||
unsigned int length, remaining, i;
|
uint32_t length, remaining, i;
|
||||||
dt_stack_t* stack = &(pConv->pStack[1]);
|
dt_stack_t* stack = &(pConv->pStack[1]);
|
||||||
|
|
||||||
*out_size = 1;
|
*out_size = 1;
|
||||||
@ -344,12 +344,12 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
|
|||||||
|
|
||||||
int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
|
int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
|
||||||
struct iovec* iov,
|
struct iovec* iov,
|
||||||
unsigned int* out_size,
|
uint32_t* out_size,
|
||||||
unsigned int* max_data,
|
uint32_t* max_data,
|
||||||
int* freeAfter )
|
int32_t* freeAfter )
|
||||||
{
|
{
|
||||||
dt_desc_t *pData = pConvertor->pDesc;
|
dt_desc_t *pData = pConvertor->pDesc;
|
||||||
unsigned int length;
|
uint32_t length;
|
||||||
|
|
||||||
*freeAfter = 0;
|
*freeAfter = 0;
|
||||||
if( pConvertor->bConverted == (pData->size * pConvertor->count) ) {
|
if( pConvertor->bConverted == (pData->size * pConvertor->count) ) {
|
||||||
@ -383,13 +383,13 @@ int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
|
|||||||
* basic datatype.
|
* basic datatype.
|
||||||
*/
|
*/
|
||||||
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
|
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
|
||||||
static int copy_##TYPENAME( unsigned int count, \
|
static int copy_##TYPENAME( uint32_t count, \
|
||||||
char* from, unsigned int from_len, long from_extent, \
|
char* from, uint32_t from_len, long from_extent, \
|
||||||
char* to, unsigned int to_len, long to_extent ) \
|
char* to, uint32_t to_len, long to_extent ) \
|
||||||
{ \
|
{ \
|
||||||
unsigned int i; \
|
uint32_t i; \
|
||||||
unsigned int remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
|
uint32_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
|
||||||
unsigned int local_TYPE_size = (COUNT) * sizeof(TYPE); \
|
uint32_t local_TYPE_size = (COUNT) * sizeof(TYPE); \
|
||||||
\
|
\
|
||||||
if( (remote_TYPE_size * count) > from_len ) { \
|
if( (remote_TYPE_size * count) > from_len ) { \
|
||||||
count = from_len / remote_TYPE_size; \
|
count = from_len / remote_TYPE_size; \
|
||||||
@ -416,22 +416,22 @@ static int copy_##TYPENAME( unsigned int count, \
|
|||||||
return count; \
|
return count; \
|
||||||
}
|
}
|
||||||
|
|
||||||
static int copy_bytes_1( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
|
static int copy_bytes_1( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
|
||||||
static int copy_bytes_2( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
|
static int copy_bytes_2( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
|
||||||
static int copy_bytes_4( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
|
static int copy_bytes_4( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
|
||||||
static int copy_bytes_8( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
|
static int copy_bytes_8( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
|
||||||
static int copy_bytes_12( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
|
static int copy_bytes_12( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
|
||||||
static int copy_bytes_16( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
|
static int copy_bytes_16( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
|
||||||
static int copy_bytes_20( unsigned int count, char* from, unsigned int from_len, long from_extent, char* to, unsigned int to_len, long to_extent );
|
static int copy_bytes_20( uint32_t count, char* from, uint32_t from_len, long from_extent, char* to, uint32_t to_len, long to_extent );
|
||||||
|
|
||||||
#define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \
|
#define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \
|
||||||
static int copy_##TYPENAME##_##COUNT( unsigned int count, \
|
static int copy_##TYPENAME##_##COUNT( uint32_t count, \
|
||||||
char* from, unsigned int from_len, long from_extent, \
|
char* from, uint32_t from_len, long from_extent, \
|
||||||
char* to, unsigned int to_len, long to_extent) \
|
char* to, uint32_t to_len, long to_extent) \
|
||||||
{ \
|
{ \
|
||||||
unsigned int i; \
|
uint32_t i; \
|
||||||
unsigned int remote_TYPE_size = (COUNT); /* TODO */ \
|
uint32_t remote_TYPE_size = (COUNT); /* TODO */ \
|
||||||
unsigned int local_TYPE_size = (COUNT); \
|
uint32_t local_TYPE_size = (COUNT); \
|
||||||
\
|
\
|
||||||
if( (remote_TYPE_size * count) > from_len ) { \
|
if( (remote_TYPE_size * count) > from_len ) { \
|
||||||
count = from_len / remote_TYPE_size; \
|
count = from_len / remote_TYPE_size; \
|
||||||
@ -601,7 +601,7 @@ int ompi_convertor_need_buffers( ompi_convertor_t* pConvertor )
|
|||||||
}
|
}
|
||||||
|
|
||||||
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
|
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
|
||||||
int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, unsigned int flags,
|
int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, uint32_t flags,
|
||||||
dt_desc_t* pData, int count,
|
dt_desc_t* pData, int count,
|
||||||
void* pUserBuf, int starting_point,
|
void* pUserBuf, int starting_point,
|
||||||
memalloc_fct_t allocfn )
|
memalloc_fct_t allocfn )
|
||||||
|
@ -19,7 +19,6 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
|
|||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
int pos_desc; /* actual position in the description of the derived datatype */
|
int pos_desc; /* actual position in the description of the derived datatype */
|
||||||
int type, lastLength = 0;
|
int type, lastLength = 0;
|
||||||
long totalDisp;
|
|
||||||
ompi_datatype_t* pData = pConvertor->pDesc;
|
ompi_datatype_t* pData = pConvertor->pDesc;
|
||||||
int* remoteLength;
|
int* remoteLength;
|
||||||
int loop_length;
|
int loop_length;
|
||||||
@ -34,6 +33,12 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
|
|||||||
*/
|
*/
|
||||||
if( pConvertor->bConverted == (unsigned long)starting_point ) return OMPI_SUCCESS;
|
if( pConvertor->bConverted == (unsigned long)starting_point ) return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
/* do we provide more place than necessary for the data ? */
|
||||||
|
if( starting_point >= (int)(pConvertor->count * pData->size) ) {
|
||||||
|
pConvertor->bConverted = pConvertor->count * pData->size;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
pConvertor->stack_pos = 0;
|
pConvertor->stack_pos = 0;
|
||||||
pStack = pConvertor->pStack;
|
pStack = pConvertor->pStack;
|
||||||
/* Fill the first position on the stack. This one correspond to the
|
/* Fill the first position on the stack. This one correspond to the
|
||||||
@ -81,24 +86,32 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
|
|||||||
pStack->disp = pElems[loop_length].disp;
|
pStack->disp = pElems[loop_length].disp;
|
||||||
|
|
||||||
pos_desc = 0;
|
pos_desc = 0;
|
||||||
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
|
remoteLength = (int*)alloca( sizeof(int) * (pConvertor->pDesc->btypes[DT_LOOP] + 1));
|
||||||
remoteLength[0] = 0; /* initial value set to ZERO */
|
remoteLength[0] = 0; /* initial value set to ZERO */
|
||||||
|
|
||||||
|
/* The only way to get out of this loop is when we reach the desired position or
|
||||||
|
* when we finish the whole datatype.
|
||||||
|
*/
|
||||||
next_loop:
|
next_loop:
|
||||||
totalDisp = pStack->disp;
|
|
||||||
loop_length = remoteLength[pConvertor->stack_pos];
|
loop_length = remoteLength[pConvertor->stack_pos];
|
||||||
while( pos_desc >= 0 ) {
|
while( pos_desc >= 0 ) {
|
||||||
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
|
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
|
||||||
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems;
|
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems;
|
||||||
long extent;
|
long extent;
|
||||||
|
|
||||||
|
/* as we reach the end of the loop the count should be decreased by one */
|
||||||
|
pStack->count--;
|
||||||
|
|
||||||
/* now we know the length of the loop. We can compute
|
/* now we know the length of the loop. We can compute
|
||||||
* if the the starting_position will happend in one of the
|
* if the starting_position will happend in one of the
|
||||||
* iterations of this loop.
|
* iterations of this loop.
|
||||||
*/
|
*/
|
||||||
remoteLength[pConvertor->stack_pos] = loop_length;
|
remoteLength[pConvertor->stack_pos] = loop_length;
|
||||||
if( (loop_length * pStack->count) > resting_place ) {
|
if( (loop_length * pStack->count) > resting_place ) {
|
||||||
/* OK here we stop in this loop. First save the loop
|
/* We will stop somewhere on this loop. To avoid moving inside the loop
|
||||||
* on the stack, then save the position of the last data
|
* multiple times, we can compute the index of the loop where we will
|
||||||
|
* stop. Once this index is computed we can then reparse the loop once
|
||||||
|
* until we find the correct position.
|
||||||
*/
|
*/
|
||||||
int cnt = resting_place / loop_length;
|
int cnt = resting_place / loop_length;
|
||||||
if( pStack->index == -1 ) {
|
if( pStack->index == -1 ) {
|
||||||
@ -110,27 +123,32 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
|
|||||||
resting_place -= cnt * loop_length;
|
resting_place -= cnt * loop_length;
|
||||||
pStack->disp += cnt * extent;
|
pStack->disp += cnt * extent;
|
||||||
pos_desc -= end_loop->items; /* go back to the first element in the loop */
|
pos_desc -= end_loop->items; /* go back to the first element in the loop */
|
||||||
|
pElems = &(pData->desc.desc[pos_desc]);
|
||||||
goto next_loop;
|
goto next_loop;
|
||||||
}
|
}
|
||||||
/* Not in this loop. Cleanup the stack and advance to the
|
/* Not in this loop. Cleanup the stack and advance to the
|
||||||
* next data description.
|
* next data description.
|
||||||
*/
|
*/
|
||||||
pConvertor->stack_pos--;
|
loop_length *= pStack->count;
|
||||||
|
resting_place -= loop_length; /* update the resting place */
|
||||||
|
/* if we ae embedded in another loop we should update it's length too */
|
||||||
pStack--;
|
pStack--;
|
||||||
|
pConvertor->stack_pos--;
|
||||||
|
if( pConvertor->stack_pos > 0 ) {
|
||||||
|
remoteLength[pConvertor->stack_pos] += loop_length;
|
||||||
|
}
|
||||||
pos_desc++;
|
pos_desc++;
|
||||||
pElems++;
|
pElems++;
|
||||||
goto next_loop;
|
goto next_loop;
|
||||||
}
|
}
|
||||||
if( pElems->type == DT_LOOP ) {
|
if( pElems->type == DT_LOOP ) {
|
||||||
remoteLength[pConvertor->stack_pos + 1] = 0;
|
remoteLength[pConvertor->stack_pos + 1] = 0;
|
||||||
totalDisp = pElems->disp;
|
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||||
pData->desc.desc[pos_desc].count,
|
pData->desc.desc[pos_desc].count,
|
||||||
totalDisp, pos_desc + pElems->disp );
|
pStack->disp, pos_desc + pElems->disp );
|
||||||
pos_desc++;
|
pos_desc++;
|
||||||
pElems++;
|
pElems++;
|
||||||
loop_length = 0; /* starting a new loop */
|
loop_length = 0; /* starting a new loop */
|
||||||
goto next_loop;
|
|
||||||
}
|
}
|
||||||
while( pElems->flags & DT_FLAG_DATA ) {
|
while( pElems->flags & DT_FLAG_DATA ) {
|
||||||
/* now here we have a basic datatype */
|
/* now here we have a basic datatype */
|
||||||
@ -141,7 +159,7 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
|
|||||||
resting_place -= cnt * ompi_ddt_basicDatatypes[type]->size;
|
resting_place -= cnt * ompi_ddt_basicDatatypes[type]->size;
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||||
pElems->count - cnt,
|
pElems->count - cnt,
|
||||||
totalDisp + pElems->disp + cnt * pElems->extent,
|
pElems->disp + cnt * pElems->extent,
|
||||||
pos_desc );
|
pos_desc );
|
||||||
pConvertor->bConverted += (starting_point - resting_place);
|
pConvertor->bConverted += (starting_point - resting_place);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -152,9 +170,8 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
|
|||||||
pElems++;
|
pElems++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, 0, 0, 0, 0 );
|
|
||||||
|
|
||||||
/* Correctly update the bConverted field */
|
/* Correctly update the bConverted field */
|
||||||
pConvertor->bConverted = starting_point - resting_place;
|
pConvertor->bConverted = pData->size * pConvertor->count;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user