Look like a lot of changes but the logic is exactly the same. I mostly clean-up
the whole datatype code, make it a little bit more readable and add some additional checks for correctness. In same time I move some internal structures from the external .h include to the internal one. The ddt_test.c get one more datatype to test. This one look like those used in the BLACS test code. This commit was SVN r5498.
Этот коммит содержится в:
родитель
3c6fb03e9c
Коммит
dbac158804
@ -76,21 +76,12 @@ OMPI_DECLSPEC extern ompi_pointer_array_t *ompi_datatype_f_to_c_table;
|
||||
#define DT_FLAG_DATA_FORTRAN 0xC000
|
||||
#define DT_FLAG_DATA_LANGUAGE 0xC000
|
||||
|
||||
/* the basic element. A data description is composed
|
||||
* by a set of basic elements.
|
||||
*/
|
||||
typedef struct __dt_elem_desc {
|
||||
uint16_t flags; /**< flags for the record */
|
||||
uint16_t type; /**< the basic data type id */
|
||||
uint32_t count; /**< number of elements */
|
||||
long disp; /**< displacement of the first element */
|
||||
int32_t extent; /**< extent of each element */
|
||||
} dt_elem_desc_t;
|
||||
typedef union dt_elem_desc dt_elem_desc_t;
|
||||
|
||||
typedef struct __dt_struct_desc {
|
||||
uint32_t length; /* the maximum number of elements in the description array */
|
||||
uint32_t used; /* the number of used elements in the description array */
|
||||
dt_elem_desc_t* desc;
|
||||
uint32_t length; /* the maximum number of elements in the description array */
|
||||
uint32_t used; /* the number of used elements in the description array */
|
||||
dt_elem_desc_t* desc;
|
||||
} dt_type_desc_t;
|
||||
|
||||
/* the data description.
|
||||
@ -204,6 +195,7 @@ typedef struct __dt_stack {
|
||||
int32_t end_loop; /**< for loops the end of the loop, otherwise useless */
|
||||
long disp; /**< actual displacement depending on the count field */
|
||||
} dt_stack_t;
|
||||
|
||||
#define DT_STATIC_STACK_SIZE 5
|
||||
|
||||
struct ompi_convertor_t {
|
||||
@ -211,6 +203,7 @@ struct ompi_convertor_t {
|
||||
uint32_t remoteArch; /**< the remote architecture */
|
||||
uint32_t flags; /**< the properties of this convertor */
|
||||
ompi_datatype_t* pDesc; /**< the datatype description associated with the convertor */
|
||||
dt_type_desc_t* use_desc; /**< the datatype version used by the convertor (normal or optimized) */
|
||||
uint32_t count; /**< the total number of full datatype elements */
|
||||
char* pBaseBuf; /**< initial buffer as supplied by the user */
|
||||
dt_stack_t* pStack; /**< the local stack for the actual conversion */
|
||||
|
@ -110,24 +110,44 @@ static inline void DUMP( char* fmt, ... )
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* These 2 typedefs are the same as the dt_elem_desc_t except
|
||||
* for the name of the fields.
|
||||
*/
|
||||
typedef struct __dt_loop_desc {
|
||||
uint16_t flags; /**< flags for the record */
|
||||
uint16_t type; /**< the basic data type id */
|
||||
uint32_t loops; /**< number of times the loop have to be done */
|
||||
long items; /**< number of items in the loop */
|
||||
uint32_t extent; /**< extent of the whole loop */
|
||||
} dt_loop_desc_t;
|
||||
struct ddt_elem_id_description {
|
||||
uint16_t flags; /**< flags for the record */
|
||||
uint16_t type; /**< the basic data type id */
|
||||
};
|
||||
typedef struct ddt_elem_id_description ddt_elem_id_description;
|
||||
|
||||
typedef struct __dt_endloop_desc {
|
||||
uint16_t flags; /**< flags for the record */
|
||||
uint16_t type; /**< the basic data type id */
|
||||
uint32_t items; /**< number of items in the loop */
|
||||
long total_extent; /**< total extent of the loop taking in account the repetitions */
|
||||
uint32_t size; /**< real size of the data in the loop */
|
||||
} dt_endloop_desc_t;
|
||||
/* the basic element. A data description is composed
|
||||
* by a set of basic elements.
|
||||
*/
|
||||
struct ddt_elem_desc {
|
||||
ddt_elem_id_description common; /**< basic data description and flags */
|
||||
uint32_t count; /**< number of elements */
|
||||
long disp; /**< displacement of the first element */
|
||||
int32_t extent; /**< extent of each element */
|
||||
};
|
||||
typedef struct ddt_elem_desc ddt_elem_desc_t;
|
||||
|
||||
struct ddt_loop_desc {
|
||||
ddt_elem_id_description common; /**< basic data description and flags */
|
||||
uint32_t loops; /**< number of elements */
|
||||
long items; /**< number of items in the loop */
|
||||
uint32_t extent; /**< extent of the whole loop */
|
||||
};
|
||||
typedef struct ddt_loop_desc ddt_loop_desc_t;
|
||||
|
||||
struct ddt_endloop_desc {
|
||||
ddt_elem_id_description common; /**< basic data description and flags */
|
||||
uint32_t items; /**< number of elements */
|
||||
long total_extent; /**< total extent of the loop taking in account the repetitions */
|
||||
uint32_t size; /**< real size of the data in the loop */
|
||||
};
|
||||
typedef struct ddt_endloop_desc ddt_endloop_desc_t;
|
||||
|
||||
union dt_elem_desc {
|
||||
ddt_elem_desc_t elem;
|
||||
ddt_loop_desc_t loop;
|
||||
ddt_endloop_desc_t end_loop;
|
||||
};
|
||||
|
||||
/* keep the last 16 bits free for data flags */
|
||||
#define CONVERTOR_USELESS 0x00010000
|
||||
@ -155,26 +175,7 @@ typedef struct {
|
||||
} ompi_complex_long_double_t;
|
||||
|
||||
extern const ompi_datatype_t* ompi_ddt_basicDatatypes[];
|
||||
|
||||
/* macros to play with the flags */
|
||||
#define SWAP( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) ^ (FLAG)
|
||||
#define SET_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) | (FLAG)
|
||||
#define UNSET_FLAG( INT_VALUE, FLAG) (INT_VALUE) = (INT_VALUE) & (~(FLAG))
|
||||
|
||||
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) SET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
|
||||
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) UNSET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
|
||||
|
||||
#if defined(__GNUC__) && !defined(__STDC__)
|
||||
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a) })
|
||||
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
|
||||
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
#else
|
||||
static inline long LMAX( long a, long b ) { return ( a < b ? b : a ); }
|
||||
static inline long LMIN( long a, long b ) { return ( a < b ? a : b ); }
|
||||
static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); }
|
||||
static inline int IMIN( int a, int b ) { return ( a < b ? a : b ); }
|
||||
#endif /* __GNU__ */
|
||||
#define BASIC_DDT_FROM_ELEM( ELEM ) (ompi_ddt_basicDatatypes[(ELEM).elem.common.type])
|
||||
|
||||
extern conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED];
|
||||
extern int32_t ompi_ddt_external32_init( void );
|
||||
@ -270,7 +271,7 @@ static inline int GET_FIRST_NON_LOOP( const dt_elem_desc_t* _pElem )
|
||||
/* We dont have to check for the end as we always put an END_LOOP
|
||||
* at the end of all datatype descriptions.
|
||||
*/
|
||||
while( _pElem->type == DT_LOOP ) {
|
||||
while( _pElem->elem.common.type == DT_LOOP ) {
|
||||
++_pElem; index++;
|
||||
}
|
||||
return index;
|
||||
@ -359,16 +360,14 @@ int ompi_convertor_create_stack_at_begining( ompi_convertor_t* pConvertor, const
|
||||
* the entries on the stack ? Should I stop when I reach the first data element or
|
||||
* should I stop on the first contiguous loop ?
|
||||
*/
|
||||
while( pElems[index].type == DT_LOOP ) {
|
||||
dt_loop_desc_t* loop = (dt_loop_desc_t*)&(pElems[index]);
|
||||
|
||||
while( pElems[index].elem.common.type == DT_LOOP ) {
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, index,
|
||||
loop->loops, 0, loop->items );
|
||||
pElems[index].loop.loops, 0, pElems[index].loop.items );
|
||||
index++;
|
||||
}
|
||||
if( pElems[index].flags & DT_FLAG_DATA ) { /* let's stop here */
|
||||
if( pElems[index].elem.common.flags & DT_FLAG_DATA ) { /* let's stop here */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, index,
|
||||
pElems[index].count, pElems[index].disp, 0 );
|
||||
pElems[index].elem.count, pElems[index].elem.disp, 0 );
|
||||
} else {
|
||||
ompi_output( 0, "Here we should have a data in the datatype description\n" );
|
||||
}
|
||||
|
@ -485,6 +485,74 @@ ompi_datatype_t* test_struct_char_double( void )
|
||||
return pdt;
|
||||
}
|
||||
|
||||
ompi_datatype_t* test_create_twice_two_doubles( void )
|
||||
{
|
||||
ompi_datatype_t* pdt;
|
||||
|
||||
ompi_ddt_create_vector( 2, 2, 5, &ompi_mpi_double, &pdt );
|
||||
ompi_ddt_commit( &pdt );
|
||||
ompi_ddt_dump( pdt );
|
||||
return pdt;
|
||||
}
|
||||
|
||||
/*
|
||||
Datatype 0x832cf28 size 0 align 1 id 0 length 4 used 0
|
||||
true_lb 0 true_ub 0 (true_extent 0) lb 0 ub 0 (extent 0)
|
||||
nbElems 0 loops 0 flags 6 (commited contiguous )-cC--------[---][---]
|
||||
contain 13 disp 0x420 (1056) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x478 (1144) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x4d0 (1232) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x528 (1320) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x580 (1408) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x5d8 (1496) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x630 (1584) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 12 disp 0x68c (1676) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 11 disp 0x6e8 (1768) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 10 disp 0x744 (1860) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 9 disp 0x7a0 (1952) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 8 disp 0x7fc (2044) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 7 disp 0x858 (2136) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 6 disp 0x8b4 (2228) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 5 disp 0x910 (2320) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 4 disp 0x96c (2412) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 3 disp 0x9c8 (2504) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 2 disp 0xa24 (2596) extent 4
|
||||
--C-----D*-[ C ][INT] MPI_INT count 1 disp 0xa80 (2688) extent 4
|
||||
*/
|
||||
static int blacs_length[] = { 13, 13, 13, 13, 13, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 };
|
||||
static int blacs_indices[] = { 1144/4, 1232/4, 1320/4, 1408/4, 1496/4, 1584/4, 1676/4, 1768/4,
|
||||
1860/4, 1952/4, 2044/4, 2136/4, 2228/4, 2320/4, 2412/4, 2504/4,
|
||||
2596/4, 2688/4 };
|
||||
ompi_datatype_t* test_create_blacs_type( void )
|
||||
{
|
||||
ompi_datatype_t *pdt;
|
||||
|
||||
ompi_ddt_create_indexed( 18, blacs_length, blacs_indices, &ompi_mpi_int, &pdt );
|
||||
ompi_ddt_commit( &pdt );
|
||||
ompi_ddt_dump( pdt );
|
||||
return pdt;
|
||||
}
|
||||
|
||||
ompi_datatype_t* test_create_blacs_type1( ompi_datatype_t* base_type )
|
||||
{
|
||||
ompi_datatype_t *pdt;
|
||||
|
||||
ompi_ddt_create_vector( 7, 1, 3, base_type, &pdt );
|
||||
ompi_ddt_commit( &pdt );
|
||||
ompi_ddt_dump( pdt );
|
||||
return pdt;
|
||||
}
|
||||
|
||||
ompi_datatype_t* test_create_blacs_type2( ompi_datatype_t* base_type )
|
||||
{
|
||||
ompi_datatype_t *pdt;
|
||||
|
||||
ompi_ddt_create_vector( 7, 1, 2, base_type, &pdt );
|
||||
ompi_ddt_commit( &pdt );
|
||||
ompi_ddt_dump( pdt );
|
||||
return pdt;
|
||||
}
|
||||
|
||||
ompi_datatype_t* test_struct( void )
|
||||
{
|
||||
ompi_datatype_t* types[] = { &ompi_mpi_float /* ompi_ddt_basicDatatypes[DT_FLOAT] */,
|
||||
@ -590,6 +658,110 @@ int local_copy_ddt_count( ompi_datatype_t* pdt, int count )
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count,
|
||||
ompi_datatype_t* recv_type, int recv_count,
|
||||
int chunk )
|
||||
{
|
||||
long send_extent, recv_extent;
|
||||
void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
|
||||
ompi_convertor_t *pSendConvertor = NULL, *pRecvConvertor = NULL;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count, max_data;
|
||||
int32_t free_after = 0, length = 0, done1 = 0, done2 = 0;
|
||||
|
||||
ompi_ddt_type_extent( send_type, &send_extent );
|
||||
ompi_ddt_type_extent( recv_type, &recv_extent );
|
||||
|
||||
pdst = malloc( recv_extent * recv_count );
|
||||
psrc = malloc( send_extent * send_count );
|
||||
ptemp = malloc( chunk );
|
||||
|
||||
/* fill up the receiver with ZEROS */
|
||||
memset( pdst, recv_count * recv_extent, 0 );
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < (send_count * send_extent); i++ )
|
||||
((char*)psrc)[i] = i % 128 + 32;
|
||||
}
|
||||
memset( pdst, 0, recv_count * recv_extent );
|
||||
|
||||
pSendConvertor = ompi_convertor_create( 0, 0 );
|
||||
if( OMPI_SUCCESS != ompi_convertor_init_for_send( pSendConvertor, 0, send_type, send_count, psrc, 0, NULL ) ) {
|
||||
printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
|
||||
goto clean_and_return;
|
||||
}
|
||||
pRecvConvertor = ompi_convertor_create( 0, 0 );
|
||||
if( OMPI_SUCCESS != ompi_convertor_init_for_recv( pRecvConvertor, 0, recv_type, recv_count, pdst, 0, NULL ) ) {
|
||||
printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
|
||||
goto clean_and_return;
|
||||
}
|
||||
|
||||
{ /* Initial destination */
|
||||
int i, j;
|
||||
for( j = 0; j < 7; j++ ) {
|
||||
for( i = 0; i < 2; i++ ) {
|
||||
printf( "%08x ", ((int*)pdst)[i*7+j] );
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
}
|
||||
|
||||
while( (done1 & done2) != 1 ) {
|
||||
/* They are supposed to finish in exactly the same time. */
|
||||
if( done1 | done2 ) {
|
||||
printf( "WRONG !!! the send is %d but the receive is %d\n", done1, done2 );
|
||||
}
|
||||
|
||||
max_data = chunk;
|
||||
iov_count = 1;
|
||||
iov.iov_base = ptemp;
|
||||
iov.iov_len = chunk;
|
||||
|
||||
if( done1 == 0 ) {
|
||||
done1 = ompi_convertor_pack( pSendConvertor, &iov, &iov_count, &max_data, &free_after );
|
||||
assert( free_after == 0 );
|
||||
if( 1 == done1 ) {
|
||||
printf( "pack finished\n" );
|
||||
}
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < 7; i++ )
|
||||
printf( "%x\n", ((int*)ptemp)[i] );
|
||||
}
|
||||
}
|
||||
|
||||
if( done2 == 0 ) {
|
||||
done2 = ompi_convertor_unpack( pRecvConvertor, &iov, &iov_count, &max_data, &free_after );
|
||||
assert( free_after == 0 );
|
||||
if( 1 == done2 ) {
|
||||
printf( "unpack finished\n" );
|
||||
}
|
||||
}
|
||||
|
||||
length += max_data;
|
||||
}
|
||||
{ /* final destination */
|
||||
int i, j;
|
||||
for( j = 0; j < 7; j++ ) {
|
||||
for( i = 0; i < 2; i++ ) {
|
||||
printf( "%08x ", ((int*)pdst)[i*7+j] );
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
}
|
||||
clean_and_return:
|
||||
if( pSendConvertor != NULL ) {
|
||||
OBJ_RELEASE( pSendConvertor ); assert( pSendConvertor == NULL );
|
||||
}
|
||||
if( pRecvConvertor != NULL ) {
|
||||
OBJ_RELEASE( pRecvConvertor ); assert( pRecvConvertor == NULL );
|
||||
}
|
||||
if( NULL != pdst ) free( pdst );
|
||||
if( NULL != psrc ) free( psrc );
|
||||
if( NULL != ptemp ) free( ptemp );
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int local_copy_with_convertor( ompi_datatype_t* pdt, int count, int chunk )
|
||||
{
|
||||
long extent;
|
||||
@ -645,7 +817,6 @@ int local_copy_with_convertor( ompi_datatype_t* pdt, int count, int chunk )
|
||||
|
||||
length += max_data;
|
||||
}
|
||||
|
||||
clean_and_return:
|
||||
if( pSendConvertor != NULL ) {
|
||||
OBJ_RELEASE( pSendConvertor ); assert( pSendConvertor == NULL );
|
||||
@ -666,7 +837,7 @@ int main( int argc, char* argv[] )
|
||||
|
||||
ompi_ddt_init();
|
||||
|
||||
pdt = create_strange_dt();
|
||||
/* pdt = create_strange_dt();
|
||||
local_copy_ddt_count(pdt, 1);
|
||||
local_copy_with_convertor(pdt, 1, 4008);
|
||||
OBJ_RELEASE( pdt ); assert( pdt == NULL );
|
||||
@ -719,12 +890,26 @@ int main( int argc, char* argv[] )
|
||||
|
||||
OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL );
|
||||
OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL );
|
||||
OBJ_RELEASE( pdt3 ); /*assert( pdt3 == NULL );*/
|
||||
OBJ_RELEASE( pdt3 ); *//*assert( pdt3 == NULL );*/
|
||||
|
||||
pdt = test_struct_char_double();
|
||||
/*pdt = test_struct_char_double();
|
||||
local_copy_with_convertor( pdt, 4500, 12 );
|
||||
OBJ_RELEASE( pdt ); assert( pdt == NULL );*/
|
||||
|
||||
/*pdt = test_create_twice_two_doubles();
|
||||
local_copy_with_convertor( pdt, 4500, 12 );
|
||||
OBJ_RELEASE( pdt ); assert( pdt == NULL );
|
||||
|
||||
pdt = test_create_blacs_type();
|
||||
local_copy_with_convertor( pdt, 4500, 1023 );
|
||||
OBJ_RELEASE( pdt ); assert( pdt == NULL );*/
|
||||
|
||||
pdt1 = test_create_blacs_type1( &ompi_mpi_int );
|
||||
pdt2 = test_create_blacs_type2( &ompi_mpi_int );
|
||||
local_copy_with_convertor_2datatypes( pdt1, 1, pdt2, 1, 100 );
|
||||
OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL );
|
||||
OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL );
|
||||
|
||||
/* clean-ups all data allocations */
|
||||
ompi_ddt_finalize();
|
||||
|
||||
|
@ -19,6 +19,22 @@
|
||||
#include "datatype/datatype.h"
|
||||
#include "datatype/datatype_internal.h"
|
||||
|
||||
/* macros to play with the flags */
|
||||
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) | (DT_FLAG_CONTIGUOUS)
|
||||
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) & (~(DT_FLAG_CONTIGUOUS))
|
||||
|
||||
#if defined(__GNUC__) && !defined(__STDC__)
|
||||
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a) })
|
||||
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
|
||||
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
#else
|
||||
static inline long LMAX( long a, long b ) { return ( a < b ? b : a ); }
|
||||
static inline long LMIN( long a, long b ) { return ( a < b ? a : b ); }
|
||||
static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); }
|
||||
static inline int IMIN( int a, int b ) { return ( a < b ? a : b ); }
|
||||
#endif /* __GNU__ */
|
||||
|
||||
/* When we add a datatype we should update it's definition depending on
|
||||
* the initial displacement for the whole data, so the displacement of
|
||||
* all elements inside a datatype depend only on the loop displacement
|
||||
@ -105,15 +121,15 @@ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd,
|
||||
}
|
||||
pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
|
||||
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { /* add a basic datatype */
|
||||
pLast->type = pdtAdd->id;
|
||||
pLast->count = count;
|
||||
pLast->disp = disp;
|
||||
pLast->extent = extent;
|
||||
pLast->elem.common.type = pdtAdd->id;
|
||||
pLast->elem.count = count;
|
||||
pLast->elem.disp = disp;
|
||||
pLast->elem.extent = extent;
|
||||
pdtBase->desc.used++;
|
||||
pdtBase->btypes[pdtAdd->id] += count;
|
||||
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
|
||||
pLast->elem.common.flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
|
||||
if( extent == (int)pdtAdd->size )
|
||||
pLast->flags |= DT_FLAG_CONTIGUOUS;
|
||||
pLast->elem.common.flags |= DT_FLAG_CONTIGUOUS;
|
||||
} else {
|
||||
/* We handle a user defined datatype. We should make sure that the user will not have the
|
||||
* oportunity to destroy it before all datatypes derived are destroyed. As we keep pointers
|
||||
@ -136,11 +152,11 @@ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd,
|
||||
*/
|
||||
if( count != 1 ) {
|
||||
pLoop = pLast;
|
||||
pLast->type = DT_LOOP;
|
||||
pLast->count = count;
|
||||
pLast->disp = (long)pdtAdd->desc.used + 1;
|
||||
pLast->extent = extent;
|
||||
pLast->flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
|
||||
pLast->loop.common.type = DT_LOOP;
|
||||
pLast->loop.loops = count;
|
||||
pLast->loop.items = (long)pdtAdd->desc.used + 1;
|
||||
pLast->loop.extent = extent;
|
||||
pLast->loop.common.flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
|
||||
localFlags = DT_FLAG_IN_LOOP;
|
||||
pdtBase->btypes[DT_LOOP] += 2;
|
||||
pdtBase->desc.used += 2;
|
||||
@ -148,23 +164,20 @@ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd,
|
||||
}
|
||||
|
||||
for( i = 0; i < pdtAdd->desc.used; i++ ) {
|
||||
pLast->type = pdtAdd->desc.desc[i].type;
|
||||
pLast->flags = pdtAdd->desc.desc[i].flags | localFlags;
|
||||
pLast->count = pdtAdd->desc.desc[i].count;
|
||||
pLast->extent = pdtAdd->desc.desc[i].extent;
|
||||
pLast->disp = pdtAdd->desc.desc[i].disp;
|
||||
if( pdtAdd->desc.desc[i].type != DT_LOOP )
|
||||
pLast->disp += disp /* + pdtAdd->lb */;
|
||||
pLast->elem = pdtAdd->desc.desc[i].elem;
|
||||
pLast->elem.common.flags = pdtAdd->desc.desc[i].elem.common.flags | localFlags;
|
||||
if( DT_LOOP != pdtAdd->desc.desc[i].elem.common.type )
|
||||
pLast->elem.disp += disp /* + pdtAdd->lb */;
|
||||
pLast++;
|
||||
}
|
||||
pdtBase->desc.used += pdtAdd->desc.used;
|
||||
if( pLoop != NULL ) {
|
||||
pLast->type = DT_END_LOOP;
|
||||
pLast->count = pdtAdd->desc.used + 1; /* where the loop start */
|
||||
pLast->disp = disp + (count - 1) * extent
|
||||
+ (pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
|
||||
pLast->extent = pdtAdd->size; /* the size of the data inside the loop */
|
||||
pLast->flags = pLoop->flags;
|
||||
pLast->end_loop.common.type = DT_END_LOOP;
|
||||
pLast->end_loop.items = pdtAdd->desc.used + 1; /* where the loop start */
|
||||
pLast->end_loop.total_extent = disp + (count - 1) * extent +
|
||||
(pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
|
||||
pLast->end_loop.size = pdtAdd->size; /* the size of the data inside the loop */
|
||||
pLast->end_loop.common.flags = pLoop->loop.common.flags;
|
||||
}
|
||||
/* should I add some space until the extent of this datatype ? */
|
||||
}
|
||||
|
@ -287,17 +287,17 @@ int32_t ompi_ddt_init( void )
|
||||
ompi_datatype_t* datatype = (ompi_datatype_t*)ompi_ddt_basicDatatypes[i];
|
||||
|
||||
datatype->desc.desc = (dt_elem_desc_t*)malloc(2*sizeof(dt_elem_desc_t));
|
||||
datatype->desc.desc[0].flags = DT_FLAG_BASIC | DT_FLAG_CONTIGUOUS | DT_FLAG_DATA;
|
||||
datatype->desc.desc[0].type = i;
|
||||
datatype->desc.desc[0].count = 1;
|
||||
datatype->desc.desc[0].disp = 0;
|
||||
datatype->desc.desc[0].extent = datatype->size;
|
||||
datatype->desc.desc[0].elem.common.flags = DT_FLAG_BASIC | DT_FLAG_CONTIGUOUS | DT_FLAG_DATA;
|
||||
datatype->desc.desc[0].elem.common.type = i;
|
||||
datatype->desc.desc[0].elem.count = 1;
|
||||
datatype->desc.desc[0].elem.disp = 0;
|
||||
datatype->desc.desc[0].elem.extent = datatype->size;
|
||||
|
||||
datatype->desc.desc[1].flags = 0;
|
||||
datatype->desc.desc[1].type = DT_END_LOOP;
|
||||
datatype->desc.desc[1].count = 1;
|
||||
datatype->desc.desc[1].disp = datatype->ub - datatype->lb;
|
||||
datatype->desc.desc[1].extent = datatype->size;
|
||||
datatype->desc.desc[1].elem.common.flags = 0;
|
||||
datatype->desc.desc[1].elem.common.type = DT_END_LOOP;
|
||||
datatype->desc.desc[1].elem.count = 1;
|
||||
datatype->desc.desc[1].elem.disp = datatype->ub - datatype->lb;
|
||||
datatype->desc.desc[1].elem.extent = datatype->size;
|
||||
|
||||
datatype->desc.length = 1;
|
||||
datatype->desc.used = 1;
|
||||
@ -543,19 +543,20 @@ static int __dump_data_desc( dt_elem_desc_t* pDesc, int nbElems, char* ptr )
|
||||
int i, index = 0;
|
||||
|
||||
for( i = 0; i < nbElems; i++ ) {
|
||||
index += _dump_data_flags( pDesc->flags, ptr + index );
|
||||
if( pDesc->type == DT_LOOP )
|
||||
index += sprintf( ptr + index, "%15s %d times the next %d elements extent %d\n",
|
||||
ompi_ddt_basicDatatypes[pDesc->type]->name,
|
||||
(int)pDesc->count, (int)pDesc->disp, (int)pDesc->extent );
|
||||
else if( pDesc->type == DT_END_LOOP )
|
||||
index += sprintf( ptr + index, "%15s prev %d elements total true extent %d size of data %d\n",
|
||||
ompi_ddt_basicDatatypes[pDesc->type]->name,
|
||||
(int)pDesc->count, (int)pDesc->disp, (int)pDesc->extent );
|
||||
index += _dump_data_flags( pDesc->elem.common.flags, ptr + index );
|
||||
index += sprintf( ptr + index, "%15s ", ompi_ddt_basicDatatypes[pDesc->elem.common.type]->name );
|
||||
if( DT_LOOP == pDesc->elem.common.type )
|
||||
index += sprintf( ptr + index, "%d times the next %d elements extent %d\n",
|
||||
(int)pDesc->loop.loops, (int)pDesc->loop.items,
|
||||
(int)pDesc->loop.extent );
|
||||
else if( DT_END_LOOP == pDesc->elem.common.type )
|
||||
index += sprintf( ptr + index, "prev %d elements total true extent %d size of data %d\n",
|
||||
(int)pDesc->end_loop.items, (int)pDesc->end_loop.total_extent,
|
||||
(int)pDesc->end_loop.size );
|
||||
else
|
||||
index += sprintf( ptr + index, "%15s count %d disp 0x%lx (%ld) extent %d\n",
|
||||
ompi_ddt_basicDatatypes[pDesc->type]->name,
|
||||
(int)pDesc->count, pDesc->disp, pDesc->disp, (int)pDesc->extent );
|
||||
index += sprintf( ptr + index, "count %d disp 0x%lx (%ld) extent %d\n",
|
||||
(int)pDesc->elem.count, pDesc->elem.disp, pDesc->elem.disp,
|
||||
(int)pDesc->elem.extent );
|
||||
pDesc++;
|
||||
}
|
||||
return index;
|
||||
|
@ -26,22 +26,22 @@
|
||||
|
||||
#define SAVE_DESC( PELEM, DISP, COUNT, EXTENT ) \
|
||||
do { \
|
||||
(PELEM)->flags = DT_FLAG_BASIC | DT_FLAG_DATA; \
|
||||
(PELEM)->type = DT_BYTE; \
|
||||
(PELEM)->count = (COUNT); \
|
||||
(PELEM)->disp = (DISP); \
|
||||
(PELEM)->extent = (EXTENT); \
|
||||
(PELEM)->elem.common.flags = DT_FLAG_BASIC | DT_FLAG_DATA; \
|
||||
(PELEM)->elem.common.type = DT_BYTE; \
|
||||
(PELEM)->elem.count = (COUNT); \
|
||||
(PELEM)->elem.disp = (DISP); \
|
||||
(PELEM)->elem.extent = (EXTENT); \
|
||||
(PELEM)++; \
|
||||
nbElems++; \
|
||||
} while(0)
|
||||
|
||||
#define SAVE_ELEM( PELEM, TYPE, FLAGS, COUNT, DISP, EXTENT ) \
|
||||
do { \
|
||||
(PELEM)->flags = (FLAGS); \
|
||||
(PELEM)->type = (TYPE); \
|
||||
(PELEM)->count = (COUNT); \
|
||||
(PELEM)->disp = (DISP); \
|
||||
(PELEM)->extent = (EXTENT); \
|
||||
(PELEM)->elem.common.flags = (FLAGS); \
|
||||
(PELEM)->elem.common.type = (TYPE); \
|
||||
(PELEM)->elem.count = (COUNT); \
|
||||
(PELEM)->elem.disp = (DISP); \
|
||||
(PELEM)->elem.extent = (EXTENT); \
|
||||
(PELEM)++; \
|
||||
nbElems++; \
|
||||
} while(0)
|
||||
@ -72,33 +72,32 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
|
||||
totalDisp = 0;
|
||||
|
||||
while( stack_pos >= 0 ) {
|
||||
if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
dt_loop_desc_t* pStartLoop;
|
||||
if( DT_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( lastLength != 0 ) {
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength, lastExtent );
|
||||
lastDisp += lastLength;
|
||||
lastLength = 0;
|
||||
}
|
||||
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].flags,
|
||||
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].elem.common.flags,
|
||||
nbElems - pStack->index + 1, /* # of elems in this loop */
|
||||
pData->desc.desc[pos_desc].disp,
|
||||
pData->desc.desc[pos_desc].extent );
|
||||
pData->desc.desc[pos_desc].elem.disp,
|
||||
pData->desc.desc[pos_desc].elem.extent );
|
||||
if( --stack_pos >= 0 ) { /* still something to do ? */
|
||||
pStartLoop = (dt_loop_desc_t*)&(pTypeDesc->desc[pStack->index - 1]);
|
||||
pStartLoop->items = (pElemDesc - 1)->count;
|
||||
ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop);
|
||||
pStartLoop->items = (pElemDesc - 1)->elem.count;
|
||||
totalDisp = pStack->disp; /* update the displacement position */
|
||||
}
|
||||
pStack--; /* go down one position on the stack */
|
||||
pos_desc++;
|
||||
continue;
|
||||
}
|
||||
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
|
||||
dt_loop_desc_t* loop = (dt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
|
||||
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
|
||||
if( DT_LOOP == pData->desc.desc[pos_desc].elem.common.type ) {
|
||||
ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
|
||||
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
|
||||
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
|
||||
long loop_disp = pData->desc.desc[pos_desc + index].disp;
|
||||
long loop_disp = pData->desc.desc[pos_desc + index].elem.disp;
|
||||
|
||||
if( loop->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
if( loop->common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||
/* the loop is contiguous or composed by contiguous elements with a gap */
|
||||
if( loop->extent == end_loop->size ) {
|
||||
/* the whole loop is contiguous */
|
||||
@ -123,13 +122,13 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
|
||||
/* we have a gap in the begining or the end of the loop but the whole
|
||||
* loop can be merged in just one memcpy.
|
||||
*/
|
||||
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
|
||||
counter, (long)2, pData->desc.desc[pos_desc].extent );
|
||||
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].elem.common.flags,
|
||||
counter, (long)2, pData->desc.desc[pos_desc].elem.extent );
|
||||
SAVE_DESC( pElemDesc, loop_disp, end_loop->size, lastExtent );
|
||||
SAVE_ELEM( pElemDesc, DT_END_LOOP, end_loop->flags,
|
||||
SAVE_ELEM( pElemDesc, DT_END_LOOP, end_loop->common.flags,
|
||||
2, end_loop->total_extent, end_loop->size );
|
||||
}
|
||||
pos_desc += pData->desc.desc[pos_desc].disp + 1;
|
||||
pos_desc += pData->desc.desc[pos_desc].loop.items + 1;
|
||||
changes++;
|
||||
} else {
|
||||
if( lastLength != 0 ) {
|
||||
@ -137,29 +136,29 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
|
||||
lastDisp += lastLength;
|
||||
lastLength = 0;
|
||||
}
|
||||
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
|
||||
pData->desc.desc[pos_desc].count, (long)nbElems,
|
||||
pData->desc.desc[pos_desc].extent );
|
||||
PUSH_STACK( pStack, stack_pos, nbElems, pData->desc.desc[pos_desc].count,
|
||||
totalDisp, pos_desc + pData->desc.desc[pos_desc].disp );
|
||||
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].elem.common.flags,
|
||||
pData->desc.desc[pos_desc].elem.count, (long)nbElems,
|
||||
pData->desc.desc[pos_desc].elem.extent );
|
||||
PUSH_STACK( pStack, stack_pos, nbElems, pData->desc.desc[pos_desc].elem.count,
|
||||
totalDisp, pos_desc + pData->desc.desc[pos_desc].elem.disp );
|
||||
pos_desc++;
|
||||
DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" );
|
||||
}
|
||||
totalDisp = pStack->disp; /* update the displacement */
|
||||
continue;
|
||||
}
|
||||
while( pData->desc.desc[pos_desc].flags & DT_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
|
||||
while( pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
|
||||
/* now here we have a basic datatype */
|
||||
type = pData->desc.desc[pos_desc].type;
|
||||
if( (pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS) &&
|
||||
(lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].disp) ) {
|
||||
lastLength += pData->desc.desc[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
|
||||
type = pData->desc.desc[pos_desc].elem.common.type;
|
||||
if( (pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS) &&
|
||||
(lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].elem.disp) ) {
|
||||
lastLength += pData->desc.desc[pos_desc].elem.count * ompi_ddt_basicDatatypes[type]->size;
|
||||
lastExtent = 1;
|
||||
} else {
|
||||
if( lastLength != 0 )
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength, lastExtent );
|
||||
lastDisp = totalDisp + pData->desc.desc[pos_desc].disp;
|
||||
lastLength = pData->desc.desc[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
|
||||
lastDisp = totalDisp + pData->desc.desc[pos_desc].elem.disp;
|
||||
lastLength = pData->desc.desc[pos_desc].elem.count * ompi_ddt_basicDatatypes[type]->size;
|
||||
lastExtent = 1;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
@ -183,122 +182,124 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
|
||||
#if defined(COMPILE_USELSS_CODE)
|
||||
static int ompi_ddt_unroll( ompi_datatype_t* pData, int count )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pDestBuf;
|
||||
int bConverted = 0, __index = 0, __sofar = 0;
|
||||
dt_elem_desc_t* pElems;
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pDestBuf;
|
||||
int bConverted = 0, __index = 0, __sofar = 0;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pDestBuf = NULL;
|
||||
pDestBuf = NULL;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrc = (char*)pData->true_lb;
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrc = (char*)pData->true_lb;
|
||||
|
||||
type = count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
|
||||
bConverted += (pData->size * count);
|
||||
} else {
|
||||
char* pSrcBuf = (char*)pData->true_lb;
|
||||
long extent = pData->ub - pData->lb;
|
||||
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
bConverted += type;
|
||||
}
|
||||
return (bConverted == (pData->size * count));
|
||||
}
|
||||
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
type = count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
|
||||
bConverted += (pData->size * count);
|
||||
} else {
|
||||
char* pSrcBuf = (char*)pData->true_lb;
|
||||
long extent = pData->ub - pData->lb;
|
||||
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
bConverted += type;
|
||||
}
|
||||
return (bConverted == (pData->size * count));
|
||||
}
|
||||
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DDT_DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n", pStack->index, pStack->count );
|
||||
DDT_DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n", pStack->index, pStack->count );
|
||||
|
||||
while( pos_desc >= 0 ) {
|
||||
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
continue;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
|
||||
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
|
||||
i = 1;
|
||||
while( pos_desc >= 0 ) {
|
||||
if( DT_END_LOOP == pElems[pos_desc].type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
|
||||
i = 0;
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 ) {
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
} else {
|
||||
assert( DT_LOOP == pElems[pos_desc].elem.common.type );
|
||||
pStack->disp += pElems[pos_desc].loop.extent;
|
||||
}
|
||||
}
|
||||
lastLength = pLast->extent;
|
||||
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
pos_desc++;
|
||||
continue;
|
||||
}
|
||||
if( DT_LOOP == pElems[pos_desc].type ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
|
||||
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
|
||||
i = 1;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
|
||||
i = 0;
|
||||
}
|
||||
lastLength = pLast->extent;
|
||||
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += lastLength;
|
||||
bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += lastLength;
|
||||
bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif /* COMPILE_USELSS_CODE */
|
||||
|
||||
int32_t ompi_ddt_commit( ompi_datatype_t** data )
|
||||
{
|
||||
ompi_datatype_t* pData = *data;
|
||||
dt_endloop_desc_t* pLast = (dt_endloop_desc_t*)&(pData->desc.desc[pData->desc.used]);
|
||||
ddt_endloop_desc_t* pLast = &(pData->desc.desc[pData->desc.used].end_loop);
|
||||
|
||||
if( pData->flags & DT_FLAG_COMMITED ) return OMPI_SUCCESS;
|
||||
pData->flags |= DT_FLAG_COMMITED;
|
||||
@ -306,8 +307,8 @@ int32_t ompi_ddt_commit( ompi_datatype_t** data )
|
||||
/* let's add a fake element at the end just to avoid useless comparaisons
|
||||
* in pack/unpack functions.
|
||||
*/
|
||||
pLast->type = DT_END_LOOP;
|
||||
pLast->flags = 0;
|
||||
pLast->common.type = DT_END_LOOP;
|
||||
pLast->common.flags = 0;
|
||||
pLast->items = pData->desc.used;
|
||||
pLast->total_extent = pData->ub - pData->lb;
|
||||
pLast->size = pData->size;
|
||||
@ -318,9 +319,9 @@ int32_t ompi_ddt_commit( ompi_datatype_t** data )
|
||||
/* let's add a fake element at the end just to avoid useless comparaisons
|
||||
* in pack/unpack functions.
|
||||
*/
|
||||
pLast = (dt_endloop_desc_t*)&(pData->opt_desc.desc[pData->opt_desc.used]);
|
||||
pLast->type = DT_END_LOOP;
|
||||
pLast->flags = 0;
|
||||
pLast = &(pData->opt_desc.desc[pData->opt_desc.used].end_loop);
|
||||
pLast->common.type = DT_END_LOOP;
|
||||
pLast->common.flags = 0;
|
||||
pLast->items = pData->opt_desc.used;
|
||||
pLast->total_extent = pData->ub - pData->lb;
|
||||
pLast->size = pData->size;
|
||||
|
@ -79,7 +79,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
||||
pInput = iov[iov_count].iov_base;
|
||||
iCount = iov[iov_count].iov_len;
|
||||
while( 1 ) {
|
||||
if( pElem[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( DT_END_LOOP == pElem[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
if( pConvertor->stack_pos == 0 )
|
||||
goto complete_loop; /* completed */
|
||||
@ -91,47 +91,48 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
||||
if( pStack->index == -1 ) {
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
} else {
|
||||
pStack->disp += pElem[pStack->index].extent;
|
||||
assert( DT_LOOP == pElem[pStack->index].elem.common.type );
|
||||
pStack->disp += pElem[pStack->index].loop.extent;
|
||||
}
|
||||
}
|
||||
count_desc = pElem[pos_desc].count;
|
||||
disp_desc = pElem[pos_desc].disp;
|
||||
count_desc = pElem[pos_desc].elem.count;
|
||||
disp_desc = pElem[pos_desc].elem.disp;
|
||||
}
|
||||
if( pElem[pos_desc].type == DT_LOOP ) {
|
||||
if( DT_LOOP == pElem[pos_desc].elem.common.type ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, pElem[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElem[pos_desc].disp + 1);
|
||||
pos_desc, pElem[pos_desc].elem.count,
|
||||
pStack->disp, pos_desc + pElem[pos_desc].elem.disp + 1);
|
||||
pos_desc++;
|
||||
} while( pElem[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
} while( DT_LOOP == pElem[pos_desc].elem.common.type ); /* let's start another loop */
|
||||
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loops" );
|
||||
/* update the current state */
|
||||
count_desc = pElem[pos_desc].count;
|
||||
disp_desc = pElem[pos_desc].disp;
|
||||
count_desc = pElem[pos_desc].elem.count;
|
||||
disp_desc = pElem[pos_desc].elem.disp;
|
||||
continue;
|
||||
}
|
||||
while( pElem[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
while( pElem[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* now here we have a basic datatype */
|
||||
type = pElem[pos_desc].type;
|
||||
type = pElem[pos_desc].elem.common.type;
|
||||
rc = pConvertor->pFunctions[type]( count_desc,
|
||||
pOutput + pStack->disp + disp_desc,
|
||||
iCount, pElem[pos_desc].extent,
|
||||
pInput, iCount, ompi_ddt_basicDatatypes[type]->size );
|
||||
advance = rc * ompi_ddt_basicDatatypes[type]->size;
|
||||
iCount, pElem[pos_desc].elem.extent,
|
||||
pInput, iCount, BASIC_DDT_FROM_ELEM(pElem[pos_desc])->size );
|
||||
advance = rc * BASIC_DDT_FROM_ELEM(pElem[pos_desc])->size;
|
||||
iCount -= advance; /* decrease the available space in the buffer */
|
||||
pInput += advance; /* increase the pointer to the buffer */
|
||||
bConverted += advance;
|
||||
if( rc != count_desc ) {
|
||||
/* not all data has been converted. Keep the state */
|
||||
count_desc -= rc;
|
||||
disp_desc += rc * pElem[pos_desc].extent;
|
||||
disp_desc += rc * pElem[pos_desc].elem.extent;
|
||||
if( iCount != 0 )
|
||||
printf( "pack there is still room in the input buffer %d bytes\n", iCount );
|
||||
goto complete_loop;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
count_desc = pElem[pos_desc].count;
|
||||
disp_desc = pElem[pos_desc].disp;
|
||||
count_desc = pElem[pos_desc].elem.count;
|
||||
disp_desc = pElem[pos_desc].elem.disp;
|
||||
if( iCount == 0 ) goto complete_loop; /* break if there is no more data in the buffer */
|
||||
}
|
||||
}
|
||||
@ -188,7 +189,7 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
||||
pConv->stack_pos--;
|
||||
|
||||
while( 1 ) {
|
||||
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
if( pConv->stack_pos == 0 ) { /* finish everything */
|
||||
last_count = 0;
|
||||
@ -203,35 +204,36 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
pos_desc = 0;
|
||||
} else {
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
|
||||
pStack->disp += pElems[pStack->index].loop.extent;
|
||||
pos_desc = pStack->index + 1;
|
||||
}
|
||||
}
|
||||
last_count = pElems[pos_desc].count;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
last_blength = last_count;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
continue;
|
||||
}
|
||||
while( pElems[pos_desc].type == DT_LOOP ) {
|
||||
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
int stop_in_loop = 0;
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &(pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (pLast->extent * last_count) > (int)space ) {
|
||||
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
|
||||
if( (end_loop->size * last_count) > space ) {
|
||||
stop_in_loop = last_count;
|
||||
last_count = space / pLast->extent;
|
||||
last_count = space / end_loop->size;
|
||||
}
|
||||
for( i = 0; i < last_count; i++ ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
|
||||
pDestBuf += pLast->extent; /* size of the contiguous data */
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, end_loop->size );
|
||||
pDestBuf += end_loop->size; /* size of the contiguous data */
|
||||
lastDisp += pElems[pos_desc].loop.extent;
|
||||
}
|
||||
space -= (pLast->extent * last_count);
|
||||
bConverted += (pLast->extent * last_count);
|
||||
space -= (end_loop->size * last_count);
|
||||
bConverted += (end_loop->size * last_count);
|
||||
if( stop_in_loop == 0 ) {
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
last_count = pElems[pos_desc].count;
|
||||
pos_desc += pElems[pos_desc].loop.items + 1;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
continue;
|
||||
}
|
||||
/* mark some of the iterations as completed */
|
||||
@ -240,18 +242,18 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
||||
/* Save the stack with the correct last_count value. */
|
||||
}
|
||||
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
|
||||
pos_desc++;
|
||||
last_count = pElems[pos_desc].count;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* do we have enough space in the buffer ? */
|
||||
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
if( space < last_blength ) {
|
||||
last_blength = last_count;
|
||||
last_count = space / ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
space -= (last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size);
|
||||
last_count = space / BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
space -= (last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size);
|
||||
last_blength -= last_count;
|
||||
goto end_loop; /* or break whatever but go out of this while */
|
||||
}
|
||||
@ -262,8 +264,8 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
||||
space -= last_blength;
|
||||
pDestBuf += last_blength;
|
||||
pos_desc++; /* advance to the next data */
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
last_count = pElems[pos_desc].count;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
}
|
||||
}
|
||||
last_count = 0; /* complete the data */
|
||||
@ -323,7 +325,7 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||
/* retrieve the context of the last call */
|
||||
pos_desc = pStack->index;
|
||||
last_count = pStack->count;
|
||||
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
lastDisp = pStack->disp;
|
||||
savePos = (char*)pConv->pBaseBuf + pStack->disp;
|
||||
saveLength = 0;
|
||||
@ -334,7 +336,7 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||
space_on_iovec = iov[0].iov_len;
|
||||
|
||||
while( pos_desc >= 0 ) {
|
||||
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
if( pConv->stack_pos == 0 ) { /* finish everything */
|
||||
if( saveLength != 0 ) {
|
||||
@ -380,26 +382,28 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||
pStack--;
|
||||
} else {
|
||||
pos_desc = pStack->index; /* DT_LOOP index */
|
||||
if( pos_desc == -1 )
|
||||
if( pos_desc == -1 ) {
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
} else {
|
||||
assert( DT_LOOP == pElems[pos_desc].elem.common.type );
|
||||
pStack->disp += pElems[pos_desc].loop.extent;
|
||||
}
|
||||
}
|
||||
pos_desc++; /* go to the next element */
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
last_count = pElems[pos_desc].count;
|
||||
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
continue; /* next loop */
|
||||
}
|
||||
while( pElems[pos_desc].type == DT_LOOP ) {
|
||||
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
int stop_in_loop = 0;
|
||||
|
||||
/* If the loop container is contiguous then we can do some
|
||||
* optimizations.
|
||||
*/
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||
/* point to the end of loop element */
|
||||
dt_elem_desc_t* pLast = &(pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
|
||||
if( iov[iov_pos].iov_base == NULL ) {
|
||||
iov[iov_pos].iov_base = pConv->memAlloc_fn( &(iov[iov_pos].iov_len) );
|
||||
space_on_iovec = iov[iov_pos].iov_len;
|
||||
@ -407,32 +411,32 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||
(*freeAfter) |= (1 << iov_pos);
|
||||
}
|
||||
/* compute the maximum amount of data to be packed */
|
||||
if( (pLast->extent * last_count) > (int)space_on_iovec ) {
|
||||
if( (end_loop->size * last_count) > space_on_iovec ) {
|
||||
stop_in_loop = last_count;
|
||||
last_count = space_on_iovec / pLast->extent;
|
||||
last_count = space_on_iovec / end_loop->size;
|
||||
}
|
||||
/* Now let's do it */
|
||||
for( i = 0; i < last_count; i++ ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
DO_DEBUG (ompi_output( 0, "2. memcpy( %p, %p, %ld )\n", pDestBuf, pConv->pBaseBuf + lastDisp,
|
||||
pLast->extent ); );
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
pDestBuf += pLast->extent;
|
||||
end_loop->size ); );
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, end_loop->size );
|
||||
lastDisp += pElems[pos_desc].loop.extent;
|
||||
pDestBuf += end_loop->size;
|
||||
}
|
||||
DO_DEBUG( ompi_output( 0, "\t\tbConverted %ld space %ld pConv->bConverted %ld\n",
|
||||
bConverted, space_on_iovec, pConv->bConverted ); );
|
||||
i = pLast->extent * last_count; /* temporary value */
|
||||
i = end_loop->size * last_count; /* temporary value */
|
||||
space_on_iovec -= i;
|
||||
space -= i;
|
||||
bConverted += i;
|
||||
if( stop_in_loop == 0 ) { /* did I stop before the end */
|
||||
/* the pElems point to the LOOP struct */
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
last_count = pElems[pos_desc].count;
|
||||
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
/* the pElems point to the LOOP struct in the begining */
|
||||
pos_desc += pElems[pos_desc].loop.items + 1;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
continue;
|
||||
}
|
||||
/* mark some of the iterations as completed */
|
||||
@ -441,14 +445,14 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||
/* Save the stack with the correct last_count value. */
|
||||
}
|
||||
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
|
||||
pos_desc++;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
last_count = pElems[pos_desc].count;
|
||||
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* first let's see if it's contiguous with the previous chunk of memory and
|
||||
* we still have enough room in the buffer...
|
||||
*/
|
||||
@ -539,9 +543,9 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||
if( saveLength > space ) /* this will be the last element copied this time */
|
||||
continue;
|
||||
pos_desc++; /* advance to the next data */
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
last_count = pElems[pos_desc].count;
|
||||
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
}
|
||||
}
|
||||
end_loop:
|
||||
@ -580,13 +584,12 @@ ompi_convertor_pack_no_conv_contig( ompi_convertor_t* pConv,
|
||||
*/
|
||||
pSrc = pConv->pBaseBuf + pStack[0].disp + pStack[1].disp;
|
||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||
if( iov[iov_count].iov_len > length )
|
||||
iov[iov_count].iov_len = length;
|
||||
if( iov[iov_count].iov_base == NULL ) {
|
||||
iov[iov_count].iov_base = pSrc;
|
||||
if( iov[iov_count].iov_len > length )
|
||||
iov[iov_count].iov_len = length;
|
||||
} else {
|
||||
/* contiguous data just memcpy the smallest data in the user buffer */
|
||||
iov[iov_count].iov_len = IMIN( iov[iov_count].iov_len, length );
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[iov_count].iov_len,
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY( iov[iov_count].iov_base, pSrc, iov[iov_count].iov_len);
|
||||
@ -691,13 +694,12 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
|
||||
}
|
||||
|
||||
if( (long)pData->size == extent ) { /* that really contiguous */
|
||||
/* contiguous data just memcpy the smallest data in the user buffer */
|
||||
if( (pConv->bConverted + iov[iov_count].iov_len) > length )
|
||||
iov[iov_count].iov_len = length - pConv->bConverted;
|
||||
if( iov[iov_count].iov_base == NULL ) {
|
||||
iov[iov_count].iov_base = pSrc;
|
||||
if( (pConv->bConverted + iov[iov_count].iov_len) > length )
|
||||
iov[iov_count].iov_len = length - pConv->bConverted;
|
||||
} else {
|
||||
/* contiguous data just memcpy the smallest data in the user buffer */
|
||||
iov[iov_count].iov_len = IMIN( iov[iov_count].iov_len, length );
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[iov_count].iov_len,
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY( iov[iov_count].iov_base, pSrc, iov[iov_count].iov_len);
|
||||
|
@ -32,9 +32,9 @@ void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos, const dt_elem
|
||||
pStack->count, pStack->disp, pStack->end_loop );
|
||||
if( pStack->index != -1 )
|
||||
printf( "[desc count %d disp %ld extent %d]\n",
|
||||
pDesc[pStack->index].count,
|
||||
pDesc[pStack->index].disp,
|
||||
pDesc[pStack->index].extent );
|
||||
pDesc[pStack->index].elem.count,
|
||||
pDesc[pStack->index].elem.disp,
|
||||
pDesc[pStack->index].elem.extent );
|
||||
else
|
||||
printf( "\n" );
|
||||
}
|
||||
@ -91,7 +91,7 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
|
||||
pInput = iov[iov_count].iov_base;
|
||||
iCount = iov[iov_count].iov_len;
|
||||
while( 1 ) {
|
||||
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
if( pConvertor->stack_pos == 0 )
|
||||
goto save_and_return; /* completed */
|
||||
@ -99,33 +99,35 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
|
||||
pStack--;
|
||||
}
|
||||
|
||||
if( pStack->index == -1 )
|
||||
if( pStack->index == -1 ) {
|
||||
pStack->disp += (pConvertor->pDesc->ub - pConvertor->pDesc->lb);
|
||||
else
|
||||
pStack->disp += pElems[pStack->index].extent;
|
||||
} else {
|
||||
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
|
||||
pStack->disp += pElems[pStack->index].loop.extent;
|
||||
}
|
||||
pos_desc = pStack->index + 1;
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
count_desc = pElems[pos_desc].elem.count;
|
||||
disp_desc = pElems[pos_desc].elem.disp;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( DT_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp + 1 );
|
||||
pos_desc, pElems[pos_desc].loop.loops,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].loop.items + 1 );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
} while( DT_LOOP == pElems[pos_desc].loop.common.type ); /* let's start another loop */
|
||||
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
|
||||
/* update the current state */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
count_desc = pElems[pos_desc].elem.count;
|
||||
disp_desc = pElems[pos_desc].elem.disp;
|
||||
}
|
||||
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
type = pElems[pos_desc].elem.common.type;
|
||||
rc = pConvertor->pFunctions[type]( count_desc,
|
||||
pInput, iCount, ompi_ddt_basicDatatypes[type]->size,
|
||||
pConvertor->pBaseBuf + pStack->disp + disp_desc,
|
||||
oCount, pElems[pos_desc].extent );
|
||||
oCount, pElems[pos_desc].elem.extent );
|
||||
advance = rc * ompi_ddt_basicDatatypes[type]->size;
|
||||
iCount -= advance; /* decrease the available space in the buffer */
|
||||
pInput += advance; /* increase the pointer to the buffer */
|
||||
@ -133,14 +135,14 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
|
||||
if( rc != count_desc ) {
|
||||
/* not all data has been converted. Keep the state */
|
||||
count_desc -= rc;
|
||||
disp_desc += rc * pElems[pos_desc].extent;
|
||||
disp_desc += rc * pElems[pos_desc].elem.extent;
|
||||
if( iCount != 0 )
|
||||
printf( "unpack there is still room in the input buffer %d bytes\n", iCount );
|
||||
goto save_and_return;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
count_desc = pElems[pos_desc].elem.count;
|
||||
disp_desc = pElems[pos_desc].elem.disp;
|
||||
if( iCount == 0 )
|
||||
goto save_and_return; /* break if there is no more data in the buffer */
|
||||
}
|
||||
@ -194,7 +196,7 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
||||
pConv->stack_pos--;
|
||||
|
||||
while( 1 ) { /* loop forever. The exit condition is detected inside the while loop */
|
||||
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
if( pConv->stack_pos == 0 ) {
|
||||
last_blength = 0; /* nothing to copy anymore */
|
||||
@ -204,37 +206,39 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
||||
pConv->stack_pos--;
|
||||
pos_desc++;
|
||||
} else {
|
||||
if( pStack->index == -1 )
|
||||
if( pStack->index == -1 ) {
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pStack->index].extent;
|
||||
} else {
|
||||
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
|
||||
pStack->disp += pElems[pStack->index].loop.extent;
|
||||
}
|
||||
pos_desc = pStack->index + 1;
|
||||
}
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
last_count = pElems[pos_desc].count;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
continue;
|
||||
}
|
||||
while( pElems[pos_desc].type == DT_LOOP ) {
|
||||
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
int stop_in_loop = 0;
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &(pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
last_count = pElems[pos_desc].count;
|
||||
if( (pLast->extent * last_count) > space ) {
|
||||
if( pElems[pos_desc].loop.common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
|
||||
last_count = pElems[pos_desc].loop.loops;
|
||||
if( (end_loop->size * last_count) > space ) {
|
||||
stop_in_loop = last_count;
|
||||
last_count = space / pLast->extent;
|
||||
last_count = space / end_loop->size;
|
||||
}
|
||||
for( i = 0; i < last_count; i++ ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, pLast->extent );
|
||||
pSrcBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, end_loop->size );
|
||||
pSrcBuf += end_loop->size;
|
||||
lastDisp += pElems[pos_desc].loop.extent;
|
||||
}
|
||||
space -= (pLast->extent * last_count);
|
||||
bConverted += (pLast->extent * last_count);
|
||||
space -= (end_loop->size * last_count);
|
||||
bConverted += (end_loop->size * last_count);
|
||||
if( stop_in_loop == 0 ) {
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
last_count = pElems[pos_desc].count;
|
||||
pos_desc += pElems[pos_desc].loop.items + 1;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
continue;
|
||||
}
|
||||
last_count = stop_in_loop - last_count;
|
||||
@ -242,20 +246,21 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
||||
/* Save the stack with the correct last_count value. */
|
||||
}
|
||||
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
|
||||
pos_desc++;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
last_count = pElems[pos_desc].count;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
|
||||
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
|
||||
/* do we have enough space in the buffer ? */
|
||||
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
last_blength = last_count * basic_type->size;
|
||||
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||
if( space < last_blength ) {
|
||||
last_blength = space / ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
last_blength = space / basic_type->size;
|
||||
last_count -= last_blength;
|
||||
last_blength *= ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
last_blength *= basic_type->size;
|
||||
space -= last_blength;
|
||||
goto end_loop; /* or break whatever but go out of this while */
|
||||
}
|
||||
@ -268,19 +273,19 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
|
||||
} else {
|
||||
uint32_t i;
|
||||
|
||||
last_blength = ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
last_blength = basic_type->size;
|
||||
for( i = 0; i < last_count; i++ ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_blength,
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_blength );
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
pSrcBuf += ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
lastDisp += pElems[pos_desc].elem.extent;
|
||||
pSrcBuf += basic_type->size;
|
||||
}
|
||||
bConverted += ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size * last_count;
|
||||
bConverted += basic_type->size * last_count;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
last_count = pElems[pos_desc].count;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
last_count = pElems[pos_desc].elem.count;
|
||||
}
|
||||
}
|
||||
end_loop:
|
||||
@ -660,7 +665,6 @@ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSi
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int rc, nbElems = 0;
|
||||
int stack_pos = 0;
|
||||
|
||||
@ -677,7 +681,7 @@ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSi
|
||||
pos_desc = 0;
|
||||
|
||||
while( 1 ) { /* loop forever the exit conditionis on the last section */
|
||||
if( datatype->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( DT_END_LOOP == datatype->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
stack_pos--;
|
||||
pStack--;
|
||||
@ -687,31 +691,32 @@ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSi
|
||||
if( pStack->index == -1 ) {
|
||||
pStack->disp += (datatype->ub - datatype->lb);
|
||||
} else {
|
||||
pStack->disp += datatype->desc.desc[pos_desc].extent;
|
||||
assert( DT_LOOP == datatype->desc.desc[pos_desc].elem.common.type );
|
||||
pStack->disp += datatype->desc.desc[pos_desc].loop.extent;
|
||||
}
|
||||
pos_desc = pStack->index + 1;
|
||||
continue;
|
||||
}
|
||||
if( datatype->desc.desc[pos_desc].type == DT_LOOP ) {
|
||||
if( DT_LOOP == datatype->desc.desc[pos_desc].elem.common.type ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, datatype->desc.desc[pos_desc].count,
|
||||
0, pos_desc + datatype->desc.desc[pos_desc].disp );
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, datatype->desc.desc[pos_desc].loop.loops,
|
||||
0, pos_desc + datatype->desc.desc[pos_desc].loop.items );
|
||||
pos_desc++;
|
||||
} while( datatype->desc.desc[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
} while( DT_LOOP == datatype->desc.desc[pos_desc].elem.common.type ); /* let's start another loop */
|
||||
DDT_DUMP_STACK( pStack, stack_pos, datatype->desc.desc, "advance loops" );
|
||||
continue;
|
||||
}
|
||||
while( datatype->desc.desc[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
while( datatype->desc.desc[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* now here we have a basic datatype */
|
||||
type = datatype->desc.desc[pos_desc].type;
|
||||
rc = datatype->desc.desc[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
|
||||
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(datatype->desc.desc[pos_desc]);
|
||||
rc = datatype->desc.desc[pos_desc].elem.count * basic_type->size;
|
||||
if( rc >= iSize ) {
|
||||
rc = iSize / ompi_ddt_basicDatatypes[type]->size;
|
||||
rc = iSize / basic_type->size;
|
||||
nbElems += rc;
|
||||
iSize -= rc * ompi_ddt_basicDatatypes[type]->size;
|
||||
iSize -= rc * basic_type->size;
|
||||
return (iSize == 0 ? nbElems : -1);
|
||||
}
|
||||
nbElems += datatype->desc.desc[pos_desc].count;
|
||||
nbElems += datatype->desc.desc[pos_desc].elem.count;
|
||||
iSize -= rc;
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
@ -783,7 +788,7 @@ int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* datatype, int32_t
|
||||
}
|
||||
|
||||
while( 1 ) {
|
||||
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) goto end_loop;
|
||||
@ -791,31 +796,33 @@ int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* datatype, int32_t
|
||||
pos_desc++;
|
||||
} else {
|
||||
DDT_DUMP_STACK( pStack, stack_pos, pElems, "decrease loop count" );
|
||||
if( pStack->index == -1 )
|
||||
if( pStack->index == -1 ) {
|
||||
pStack->disp += (datatype->ub - datatype->lb);
|
||||
else
|
||||
pStack->disp += pElems[pStack->index].extent;
|
||||
} else {
|
||||
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
|
||||
pStack->disp += pElems[pStack->index].loop.extent;
|
||||
}
|
||||
pos_desc = pStack->index + 1;
|
||||
}
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( DT_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].loop.loops,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
} while( DT_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */
|
||||
DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
|
||||
}
|
||||
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* now here we have a basic datatype */
|
||||
if( (lastDisp + lastLength) != (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
if( (lastDisp + lastLength) != (pStack->disp + pElems[pos_desc].elem.disp) ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
|
||||
pDestBuf, datatype, count );
|
||||
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
|
||||
lastLength = 0;
|
||||
}
|
||||
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
|
||||
lastLength += pElems[pos_desc].elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
}
|
||||
|
@ -45,10 +45,9 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type, lastLength = 0;
|
||||
int lastLength = 0, loop_length;
|
||||
ompi_datatype_t* pData = pConvertor->pDesc;
|
||||
int* remoteLength;
|
||||
int loop_length;
|
||||
int resting_place = starting_point;
|
||||
dt_elem_desc_t* pElems;
|
||||
size_t remote_size;
|
||||
@ -84,7 +83,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
if( pConvertor->flags & CONVERTOR_HOMOGENEOUS ) {
|
||||
|
||||
loop_length = GET_FIRST_NON_LOOP( pElems );
|
||||
pStack->disp = pElems[loop_length].disp;
|
||||
pStack->disp = pElems[loop_length].elem.disp;
|
||||
|
||||
/* Special case for contiguous datatypes */
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
@ -94,8 +93,8 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
pStack->count -= cnt;
|
||||
cnt = starting_point - cnt * pData->size; /* number of bytes after the loop */
|
||||
pStack[1].index = 0;
|
||||
pStack[1].count = (pElems[loop_length].count *
|
||||
ompi_ddt_basicDatatypes[pElems[loop_length].type]->size) - cnt;
|
||||
pStack[1].count = (pElems[loop_length].elem.count *
|
||||
ompi_ddt_basicDatatypes[pElems[loop_length].elem.common.type]->size) - cnt;
|
||||
pStack[1].end_loop = pStack->end_loop;
|
||||
|
||||
if( (long)pData->size == extent ) { /* all elements are contiguous */
|
||||
@ -119,7 +118,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
pConvertor->bConverted += (remote_size * count);
|
||||
|
||||
loop_length = GET_FIRST_NON_LOOP( pElems );
|
||||
pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].disp;
|
||||
pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].elem.disp;
|
||||
|
||||
pos_desc = 0;
|
||||
remoteLength = (int*)alloca( sizeof(int) * (pConvertor->pDesc->btypes[DT_LOOP] + 1));
|
||||
@ -131,8 +130,8 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
next_loop:
|
||||
loop_length = remoteLength[pConvertor->stack_pos];
|
||||
while( pos_desc < pConvertor->pStack[0].end_loop ) { /* protect in case when the starting_pos is bigger than the total size */
|
||||
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
|
||||
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems;
|
||||
if( DT_END_LOOP == pElems->elem.common.type ) { /* end of the current loop */
|
||||
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)pElems;
|
||||
long extent;
|
||||
|
||||
pStack->count--;
|
||||
@ -147,7 +146,8 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
if( pStack->index == -1 ) {
|
||||
extent = pData->ub - pData->lb;
|
||||
} else {
|
||||
extent = ((dt_loop_desc_t*)(pElems - end_loop->items + 1))->extent;
|
||||
assert( DT_LOOP == pElems[1 - end_loop->items].loop.common.type );
|
||||
extent = ((ddt_loop_desc_t*)(pElems - end_loop->items + 1))->extent;
|
||||
}
|
||||
pStack->count -= cnt;
|
||||
resting_place -= cnt * loop_length;
|
||||
@ -174,27 +174,27 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
pElems++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems->type == DT_LOOP ) {
|
||||
if( DT_LOOP == pElems->elem.common.type ) {
|
||||
remoteLength[pConvertor->stack_pos] += loop_length;
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pData->desc.desc[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems->disp );
|
||||
pElems->loop.loops, /*pData->desc.desc[pos_desc].loop.loops,*/
|
||||
pStack->disp, pos_desc + pElems->loop.items );
|
||||
remoteLength[pConvertor->stack_pos] = 0;
|
||||
pos_desc++;
|
||||
pElems++;
|
||||
loop_length = 0; /* starting a new loop */
|
||||
}
|
||||
while( pElems->flags & DT_FLAG_DATA ) {
|
||||
while( pElems->elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems->type;
|
||||
lastLength = pElems->count * ompi_ddt_basicDatatypes[type]->size;
|
||||
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM( (*pElems) );
|
||||
lastLength = pElems->elem.count * basic_type->size;
|
||||
if( resting_place < lastLength ) {
|
||||
int cnt = resting_place / ompi_ddt_basicDatatypes[type]->size;
|
||||
loop_length += cnt * ompi_ddt_basicDatatypes[type]->size;
|
||||
resting_place -= (cnt * ompi_ddt_basicDatatypes[type]->size);
|
||||
int cnt = resting_place / basic_type->size;
|
||||
loop_length += cnt * basic_type->size;
|
||||
resting_place -= (cnt * basic_type->size);
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pElems->count - cnt,
|
||||
pStack->disp + pElems->disp + cnt * pElems->extent,
|
||||
pElems->elem.count - cnt,
|
||||
pStack->disp + pElems->elem.disp + cnt * pElems->elem.extent,
|
||||
pos_desc );
|
||||
pConvertor->bConverted = starting_point - resting_place;
|
||||
return OMPI_SUCCESS;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user