1
1

Look like a lot of changes but the logic is exactly the same. I mostly clean-up

the whole datatype code, make it a little bit more readable and add some
additional checks for correctness. In same time I move some internal structures
from the external .h include to the internal one.

The ddt_test.c get one more datatype to test. This one look like those used
in the BLACS test code.

This commit was SVN r5498.
Этот коммит содержится в:
George Bosilca 2005-04-24 20:08:45 +00:00
родитель 3c6fb03e9c
Коммит dbac158804
9 изменённых файлов: 634 добавлений и 433 удалений

Просмотреть файл

@ -76,21 +76,12 @@ OMPI_DECLSPEC extern ompi_pointer_array_t *ompi_datatype_f_to_c_table;
#define DT_FLAG_DATA_FORTRAN 0xC000
#define DT_FLAG_DATA_LANGUAGE 0xC000
/* the basic element. A data description is composed
* by a set of basic elements.
*/
typedef struct __dt_elem_desc {
uint16_t flags; /**< flags for the record */
uint16_t type; /**< the basic data type id */
uint32_t count; /**< number of elements */
long disp; /**< displacement of the first element */
int32_t extent; /**< extent of each element */
} dt_elem_desc_t;
typedef union dt_elem_desc dt_elem_desc_t;
typedef struct __dt_struct_desc {
uint32_t length; /* the maximum number of elements in the description array */
uint32_t used; /* the number of used elements in the description array */
dt_elem_desc_t* desc;
uint32_t length; /* the maximum number of elements in the description array */
uint32_t used; /* the number of used elements in the description array */
dt_elem_desc_t* desc;
} dt_type_desc_t;
/* the data description.
@ -204,6 +195,7 @@ typedef struct __dt_stack {
int32_t end_loop; /**< for loops the end of the loop, otherwise useless */
long disp; /**< actual displacement depending on the count field */
} dt_stack_t;
#define DT_STATIC_STACK_SIZE 5
struct ompi_convertor_t {
@ -211,6 +203,7 @@ struct ompi_convertor_t {
uint32_t remoteArch; /**< the remote architecture */
uint32_t flags; /**< the properties of this convertor */
ompi_datatype_t* pDesc; /**< the datatype description associated with the convertor */
dt_type_desc_t* use_desc; /**< the datatype version used by the convertor (normal or optimized) */
uint32_t count; /**< the total number of full datatype elements */
char* pBaseBuf; /**< initial buffer as supplied by the user */
dt_stack_t* pStack; /**< the local stack for the actual conversion */

Просмотреть файл

@ -110,24 +110,44 @@ static inline void DUMP( char* fmt, ... )
extern "C" {
#endif
/* These 2 typedefs are the same as the dt_elem_desc_t except
* for the name of the fields.
*/
typedef struct __dt_loop_desc {
uint16_t flags; /**< flags for the record */
uint16_t type; /**< the basic data type id */
uint32_t loops; /**< number of times the loop have to be done */
long items; /**< number of items in the loop */
uint32_t extent; /**< extent of the whole loop */
} dt_loop_desc_t;
struct ddt_elem_id_description {
uint16_t flags; /**< flags for the record */
uint16_t type; /**< the basic data type id */
};
typedef struct ddt_elem_id_description ddt_elem_id_description;
typedef struct __dt_endloop_desc {
uint16_t flags; /**< flags for the record */
uint16_t type; /**< the basic data type id */
uint32_t items; /**< number of items in the loop */
long total_extent; /**< total extent of the loop taking in account the repetitions */
uint32_t size; /**< real size of the data in the loop */
} dt_endloop_desc_t;
/* the basic element. A data description is composed
* by a set of basic elements.
*/
struct ddt_elem_desc {
ddt_elem_id_description common; /**< basic data description and flags */
uint32_t count; /**< number of elements */
long disp; /**< displacement of the first element */
int32_t extent; /**< extent of each element */
};
typedef struct ddt_elem_desc ddt_elem_desc_t;
struct ddt_loop_desc {
ddt_elem_id_description common; /**< basic data description and flags */
uint32_t loops; /**< number of elements */
long items; /**< number of items in the loop */
uint32_t extent; /**< extent of the whole loop */
};
typedef struct ddt_loop_desc ddt_loop_desc_t;
struct ddt_endloop_desc {
ddt_elem_id_description common; /**< basic data description and flags */
uint32_t items; /**< number of elements */
long total_extent; /**< total extent of the loop taking in account the repetitions */
uint32_t size; /**< real size of the data in the loop */
};
typedef struct ddt_endloop_desc ddt_endloop_desc_t;
union dt_elem_desc {
ddt_elem_desc_t elem;
ddt_loop_desc_t loop;
ddt_endloop_desc_t end_loop;
};
/* keep the last 16 bits free for data flags */
#define CONVERTOR_USELESS 0x00010000
@ -155,26 +175,7 @@ typedef struct {
} ompi_complex_long_double_t;
extern const ompi_datatype_t* ompi_ddt_basicDatatypes[];
/* macros to play with the flags */
#define SWAP( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) ^ (FLAG)
#define SET_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) | (FLAG)
#define UNSET_FLAG( INT_VALUE, FLAG) (INT_VALUE) = (INT_VALUE) & (~(FLAG))
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) SET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) UNSET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
#if defined(__GNUC__) && !defined(__STDC__)
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a) })
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
#else
static inline long LMAX( long a, long b ) { return ( a < b ? b : a ); }
static inline long LMIN( long a, long b ) { return ( a < b ? a : b ); }
static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); }
static inline int IMIN( int a, int b ) { return ( a < b ? a : b ); }
#endif /* __GNU__ */
#define BASIC_DDT_FROM_ELEM( ELEM ) (ompi_ddt_basicDatatypes[(ELEM).elem.common.type])
extern conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED];
extern int32_t ompi_ddt_external32_init( void );
@ -270,7 +271,7 @@ static inline int GET_FIRST_NON_LOOP( const dt_elem_desc_t* _pElem )
/* We dont have to check for the end as we always put an END_LOOP
* at the end of all datatype descriptions.
*/
while( _pElem->type == DT_LOOP ) {
while( _pElem->elem.common.type == DT_LOOP ) {
++_pElem; index++;
}
return index;
@ -359,16 +360,14 @@ int ompi_convertor_create_stack_at_begining( ompi_convertor_t* pConvertor, const
* the entries on the stack ? Should I stop when I reach the first data element or
* should I stop on the first contiguous loop ?
*/
while( pElems[index].type == DT_LOOP ) {
dt_loop_desc_t* loop = (dt_loop_desc_t*)&(pElems[index]);
while( pElems[index].elem.common.type == DT_LOOP ) {
PUSH_STACK( pStack, pConvertor->stack_pos, index,
loop->loops, 0, loop->items );
pElems[index].loop.loops, 0, pElems[index].loop.items );
index++;
}
if( pElems[index].flags & DT_FLAG_DATA ) { /* let's stop here */
if( pElems[index].elem.common.flags & DT_FLAG_DATA ) { /* let's stop here */
PUSH_STACK( pStack, pConvertor->stack_pos, index,
pElems[index].count, pElems[index].disp, 0 );
pElems[index].elem.count, pElems[index].elem.disp, 0 );
} else {
ompi_output( 0, "Here we should have a data in the datatype description\n" );
}

Просмотреть файл

@ -485,6 +485,74 @@ ompi_datatype_t* test_struct_char_double( void )
return pdt;
}
ompi_datatype_t* test_create_twice_two_doubles( void )
{
ompi_datatype_t* pdt;
ompi_ddt_create_vector( 2, 2, 5, &ompi_mpi_double, &pdt );
ompi_ddt_commit( &pdt );
ompi_ddt_dump( pdt );
return pdt;
}
/*
Datatype 0x832cf28 size 0 align 1 id 0 length 4 used 0
true_lb 0 true_ub 0 (true_extent 0) lb 0 ub 0 (extent 0)
nbElems 0 loops 0 flags 6 (commited contiguous )-cC--------[---][---]
contain 13 disp 0x420 (1056) extent 4
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x478 (1144) extent 4
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x4d0 (1232) extent 4
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x528 (1320) extent 4
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x580 (1408) extent 4
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x5d8 (1496) extent 4
--C-----D*-[ C ][INT] MPI_INT count 13 disp 0x630 (1584) extent 4
--C-----D*-[ C ][INT] MPI_INT count 12 disp 0x68c (1676) extent 4
--C-----D*-[ C ][INT] MPI_INT count 11 disp 0x6e8 (1768) extent 4
--C-----D*-[ C ][INT] MPI_INT count 10 disp 0x744 (1860) extent 4
--C-----D*-[ C ][INT] MPI_INT count 9 disp 0x7a0 (1952) extent 4
--C-----D*-[ C ][INT] MPI_INT count 8 disp 0x7fc (2044) extent 4
--C-----D*-[ C ][INT] MPI_INT count 7 disp 0x858 (2136) extent 4
--C-----D*-[ C ][INT] MPI_INT count 6 disp 0x8b4 (2228) extent 4
--C-----D*-[ C ][INT] MPI_INT count 5 disp 0x910 (2320) extent 4
--C-----D*-[ C ][INT] MPI_INT count 4 disp 0x96c (2412) extent 4
--C-----D*-[ C ][INT] MPI_INT count 3 disp 0x9c8 (2504) extent 4
--C-----D*-[ C ][INT] MPI_INT count 2 disp 0xa24 (2596) extent 4
--C-----D*-[ C ][INT] MPI_INT count 1 disp 0xa80 (2688) extent 4
*/
static int blacs_length[] = { 13, 13, 13, 13, 13, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 };
static int blacs_indices[] = { 1144/4, 1232/4, 1320/4, 1408/4, 1496/4, 1584/4, 1676/4, 1768/4,
1860/4, 1952/4, 2044/4, 2136/4, 2228/4, 2320/4, 2412/4, 2504/4,
2596/4, 2688/4 };
ompi_datatype_t* test_create_blacs_type( void )
{
ompi_datatype_t *pdt;
ompi_ddt_create_indexed( 18, blacs_length, blacs_indices, &ompi_mpi_int, &pdt );
ompi_ddt_commit( &pdt );
ompi_ddt_dump( pdt );
return pdt;
}
ompi_datatype_t* test_create_blacs_type1( ompi_datatype_t* base_type )
{
ompi_datatype_t *pdt;
ompi_ddt_create_vector( 7, 1, 3, base_type, &pdt );
ompi_ddt_commit( &pdt );
ompi_ddt_dump( pdt );
return pdt;
}
ompi_datatype_t* test_create_blacs_type2( ompi_datatype_t* base_type )
{
ompi_datatype_t *pdt;
ompi_ddt_create_vector( 7, 1, 2, base_type, &pdt );
ompi_ddt_commit( &pdt );
ompi_ddt_dump( pdt );
return pdt;
}
ompi_datatype_t* test_struct( void )
{
ompi_datatype_t* types[] = { &ompi_mpi_float /* ompi_ddt_basicDatatypes[DT_FLOAT] */,
@ -590,6 +658,110 @@ int local_copy_ddt_count( ompi_datatype_t* pdt, int count )
return OMPI_SUCCESS;
}
int local_copy_with_convertor_2datatypes( ompi_datatype_t* send_type, int send_count,
ompi_datatype_t* recv_type, int recv_count,
int chunk )
{
long send_extent, recv_extent;
void *pdst = NULL, *psrc = NULL, *ptemp = NULL;
ompi_convertor_t *pSendConvertor = NULL, *pRecvConvertor = NULL;
struct iovec iov;
uint32_t iov_count, max_data;
int32_t free_after = 0, length = 0, done1 = 0, done2 = 0;
ompi_ddt_type_extent( send_type, &send_extent );
ompi_ddt_type_extent( recv_type, &recv_extent );
pdst = malloc( recv_extent * recv_count );
psrc = malloc( send_extent * send_count );
ptemp = malloc( chunk );
/* fill up the receiver with ZEROS */
memset( pdst, recv_count * recv_extent, 0 );
{
int i;
for( i = 0; i < (send_count * send_extent); i++ )
((char*)psrc)[i] = i % 128 + 32;
}
memset( pdst, 0, recv_count * recv_extent );
pSendConvertor = ompi_convertor_create( 0, 0 );
if( OMPI_SUCCESS != ompi_convertor_init_for_send( pSendConvertor, 0, send_type, send_count, psrc, 0, NULL ) ) {
printf( "Unable to create the send convertor. Is the datatype committed ?\n" );
goto clean_and_return;
}
pRecvConvertor = ompi_convertor_create( 0, 0 );
if( OMPI_SUCCESS != ompi_convertor_init_for_recv( pRecvConvertor, 0, recv_type, recv_count, pdst, 0, NULL ) ) {
printf( "Unable to create the recv convertor. Is the datatype committed ?\n" );
goto clean_and_return;
}
{ /* Initial destination */
int i, j;
for( j = 0; j < 7; j++ ) {
for( i = 0; i < 2; i++ ) {
printf( "%08x ", ((int*)pdst)[i*7+j] );
}
printf( "\n" );
}
}
while( (done1 & done2) != 1 ) {
/* They are supposed to finish in exactly the same time. */
if( done1 | done2 ) {
printf( "WRONG !!! the send is %d but the receive is %d\n", done1, done2 );
}
max_data = chunk;
iov_count = 1;
iov.iov_base = ptemp;
iov.iov_len = chunk;
if( done1 == 0 ) {
done1 = ompi_convertor_pack( pSendConvertor, &iov, &iov_count, &max_data, &free_after );
assert( free_after == 0 );
if( 1 == done1 ) {
printf( "pack finished\n" );
}
{
int i;
for( i = 0; i < 7; i++ )
printf( "%x\n", ((int*)ptemp)[i] );
}
}
if( done2 == 0 ) {
done2 = ompi_convertor_unpack( pRecvConvertor, &iov, &iov_count, &max_data, &free_after );
assert( free_after == 0 );
if( 1 == done2 ) {
printf( "unpack finished\n" );
}
}
length += max_data;
}
{ /* final destination */
int i, j;
for( j = 0; j < 7; j++ ) {
for( i = 0; i < 2; i++ ) {
printf( "%08x ", ((int*)pdst)[i*7+j] );
}
printf( "\n" );
}
}
clean_and_return:
if( pSendConvertor != NULL ) {
OBJ_RELEASE( pSendConvertor ); assert( pSendConvertor == NULL );
}
if( pRecvConvertor != NULL ) {
OBJ_RELEASE( pRecvConvertor ); assert( pRecvConvertor == NULL );
}
if( NULL != pdst ) free( pdst );
if( NULL != psrc ) free( psrc );
if( NULL != ptemp ) free( ptemp );
return OMPI_SUCCESS;
}
int local_copy_with_convertor( ompi_datatype_t* pdt, int count, int chunk )
{
long extent;
@ -645,7 +817,6 @@ int local_copy_with_convertor( ompi_datatype_t* pdt, int count, int chunk )
length += max_data;
}
clean_and_return:
if( pSendConvertor != NULL ) {
OBJ_RELEASE( pSendConvertor ); assert( pSendConvertor == NULL );
@ -666,7 +837,7 @@ int main( int argc, char* argv[] )
ompi_ddt_init();
pdt = create_strange_dt();
/* pdt = create_strange_dt();
local_copy_ddt_count(pdt, 1);
local_copy_with_convertor(pdt, 1, 4008);
OBJ_RELEASE( pdt ); assert( pdt == NULL );
@ -719,12 +890,26 @@ int main( int argc, char* argv[] )
OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL );
OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL );
OBJ_RELEASE( pdt3 ); /*assert( pdt3 == NULL );*/
OBJ_RELEASE( pdt3 ); *//*assert( pdt3 == NULL );*/
pdt = test_struct_char_double();
/*pdt = test_struct_char_double();
local_copy_with_convertor( pdt, 4500, 12 );
OBJ_RELEASE( pdt ); assert( pdt == NULL );*/
/*pdt = test_create_twice_two_doubles();
local_copy_with_convertor( pdt, 4500, 12 );
OBJ_RELEASE( pdt ); assert( pdt == NULL );
pdt = test_create_blacs_type();
local_copy_with_convertor( pdt, 4500, 1023 );
OBJ_RELEASE( pdt ); assert( pdt == NULL );*/
pdt1 = test_create_blacs_type1( &ompi_mpi_int );
pdt2 = test_create_blacs_type2( &ompi_mpi_int );
local_copy_with_convertor_2datatypes( pdt1, 1, pdt2, 1, 100 );
OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL );
OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL );
/* clean-ups all data allocations */
ompi_ddt_finalize();

Просмотреть файл

@ -19,6 +19,22 @@
#include "datatype/datatype.h"
#include "datatype/datatype_internal.h"
/* macros to play with the flags */
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) | (DT_FLAG_CONTIGUOUS)
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) (INT_VALUE) = (INT_VALUE) & (~(DT_FLAG_CONTIGUOUS))
#if defined(__GNUC__) && !defined(__STDC__)
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a) })
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
#else
static inline long LMAX( long a, long b ) { return ( a < b ? b : a ); }
static inline long LMIN( long a, long b ) { return ( a < b ? a : b ); }
static inline int IMAX( int a, int b ) { return ( a < b ? b : a ); }
static inline int IMIN( int a, int b ) { return ( a < b ? a : b ); }
#endif /* __GNU__ */
/* When we add a datatype we should update it's definition depending on
* the initial displacement for the whole data, so the displacement of
* all elements inside a datatype depend only on the loop displacement
@ -105,15 +121,15 @@ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd,
}
pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { /* add a basic datatype */
pLast->type = pdtAdd->id;
pLast->count = count;
pLast->disp = disp;
pLast->extent = extent;
pLast->elem.common.type = pdtAdd->id;
pLast->elem.count = count;
pLast->elem.disp = disp;
pLast->elem.extent = extent;
pdtBase->desc.used++;
pdtBase->btypes[pdtAdd->id] += count;
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
pLast->elem.common.flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
if( extent == (int)pdtAdd->size )
pLast->flags |= DT_FLAG_CONTIGUOUS;
pLast->elem.common.flags |= DT_FLAG_CONTIGUOUS;
} else {
/* We handle a user defined datatype. We should make sure that the user will not have the
* oportunity to destroy it before all datatypes derived are destroyed. As we keep pointers
@ -136,11 +152,11 @@ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd,
*/
if( count != 1 ) {
pLoop = pLast;
pLast->type = DT_LOOP;
pLast->count = count;
pLast->disp = (long)pdtAdd->desc.used + 1;
pLast->extent = extent;
pLast->flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
pLast->loop.common.type = DT_LOOP;
pLast->loop.loops = count;
pLast->loop.items = (long)pdtAdd->desc.used + 1;
pLast->loop.extent = extent;
pLast->loop.common.flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
localFlags = DT_FLAG_IN_LOOP;
pdtBase->btypes[DT_LOOP] += 2;
pdtBase->desc.used += 2;
@ -148,23 +164,20 @@ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd,
}
for( i = 0; i < pdtAdd->desc.used; i++ ) {
pLast->type = pdtAdd->desc.desc[i].type;
pLast->flags = pdtAdd->desc.desc[i].flags | localFlags;
pLast->count = pdtAdd->desc.desc[i].count;
pLast->extent = pdtAdd->desc.desc[i].extent;
pLast->disp = pdtAdd->desc.desc[i].disp;
if( pdtAdd->desc.desc[i].type != DT_LOOP )
pLast->disp += disp /* + pdtAdd->lb */;
pLast->elem = pdtAdd->desc.desc[i].elem;
pLast->elem.common.flags = pdtAdd->desc.desc[i].elem.common.flags | localFlags;
if( DT_LOOP != pdtAdd->desc.desc[i].elem.common.type )
pLast->elem.disp += disp /* + pdtAdd->lb */;
pLast++;
}
pdtBase->desc.used += pdtAdd->desc.used;
if( pLoop != NULL ) {
pLast->type = DT_END_LOOP;
pLast->count = pdtAdd->desc.used + 1; /* where the loop start */
pLast->disp = disp + (count - 1) * extent
+ (pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
pLast->extent = pdtAdd->size; /* the size of the data inside the loop */
pLast->flags = pLoop->flags;
pLast->end_loop.common.type = DT_END_LOOP;
pLast->end_loop.items = pdtAdd->desc.used + 1; /* where the loop start */
pLast->end_loop.total_extent = disp + (count - 1) * extent +
(pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
pLast->end_loop.size = pdtAdd->size; /* the size of the data inside the loop */
pLast->end_loop.common.flags = pLoop->loop.common.flags;
}
/* should I add some space until the extent of this datatype ? */
}

Просмотреть файл

@ -287,17 +287,17 @@ int32_t ompi_ddt_init( void )
ompi_datatype_t* datatype = (ompi_datatype_t*)ompi_ddt_basicDatatypes[i];
datatype->desc.desc = (dt_elem_desc_t*)malloc(2*sizeof(dt_elem_desc_t));
datatype->desc.desc[0].flags = DT_FLAG_BASIC | DT_FLAG_CONTIGUOUS | DT_FLAG_DATA;
datatype->desc.desc[0].type = i;
datatype->desc.desc[0].count = 1;
datatype->desc.desc[0].disp = 0;
datatype->desc.desc[0].extent = datatype->size;
datatype->desc.desc[0].elem.common.flags = DT_FLAG_BASIC | DT_FLAG_CONTIGUOUS | DT_FLAG_DATA;
datatype->desc.desc[0].elem.common.type = i;
datatype->desc.desc[0].elem.count = 1;
datatype->desc.desc[0].elem.disp = 0;
datatype->desc.desc[0].elem.extent = datatype->size;
datatype->desc.desc[1].flags = 0;
datatype->desc.desc[1].type = DT_END_LOOP;
datatype->desc.desc[1].count = 1;
datatype->desc.desc[1].disp = datatype->ub - datatype->lb;
datatype->desc.desc[1].extent = datatype->size;
datatype->desc.desc[1].elem.common.flags = 0;
datatype->desc.desc[1].elem.common.type = DT_END_LOOP;
datatype->desc.desc[1].elem.count = 1;
datatype->desc.desc[1].elem.disp = datatype->ub - datatype->lb;
datatype->desc.desc[1].elem.extent = datatype->size;
datatype->desc.length = 1;
datatype->desc.used = 1;
@ -543,19 +543,20 @@ static int __dump_data_desc( dt_elem_desc_t* pDesc, int nbElems, char* ptr )
int i, index = 0;
for( i = 0; i < nbElems; i++ ) {
index += _dump_data_flags( pDesc->flags, ptr + index );
if( pDesc->type == DT_LOOP )
index += sprintf( ptr + index, "%15s %d times the next %d elements extent %d\n",
ompi_ddt_basicDatatypes[pDesc->type]->name,
(int)pDesc->count, (int)pDesc->disp, (int)pDesc->extent );
else if( pDesc->type == DT_END_LOOP )
index += sprintf( ptr + index, "%15s prev %d elements total true extent %d size of data %d\n",
ompi_ddt_basicDatatypes[pDesc->type]->name,
(int)pDesc->count, (int)pDesc->disp, (int)pDesc->extent );
index += _dump_data_flags( pDesc->elem.common.flags, ptr + index );
index += sprintf( ptr + index, "%15s ", ompi_ddt_basicDatatypes[pDesc->elem.common.type]->name );
if( DT_LOOP == pDesc->elem.common.type )
index += sprintf( ptr + index, "%d times the next %d elements extent %d\n",
(int)pDesc->loop.loops, (int)pDesc->loop.items,
(int)pDesc->loop.extent );
else if( DT_END_LOOP == pDesc->elem.common.type )
index += sprintf( ptr + index, "prev %d elements total true extent %d size of data %d\n",
(int)pDesc->end_loop.items, (int)pDesc->end_loop.total_extent,
(int)pDesc->end_loop.size );
else
index += sprintf( ptr + index, "%15s count %d disp 0x%lx (%ld) extent %d\n",
ompi_ddt_basicDatatypes[pDesc->type]->name,
(int)pDesc->count, pDesc->disp, pDesc->disp, (int)pDesc->extent );
index += sprintf( ptr + index, "count %d disp 0x%lx (%ld) extent %d\n",
(int)pDesc->elem.count, pDesc->elem.disp, pDesc->elem.disp,
(int)pDesc->elem.extent );
pDesc++;
}
return index;

Просмотреть файл

@ -26,22 +26,22 @@
#define SAVE_DESC( PELEM, DISP, COUNT, EXTENT ) \
do { \
(PELEM)->flags = DT_FLAG_BASIC | DT_FLAG_DATA; \
(PELEM)->type = DT_BYTE; \
(PELEM)->count = (COUNT); \
(PELEM)->disp = (DISP); \
(PELEM)->extent = (EXTENT); \
(PELEM)->elem.common.flags = DT_FLAG_BASIC | DT_FLAG_DATA; \
(PELEM)->elem.common.type = DT_BYTE; \
(PELEM)->elem.count = (COUNT); \
(PELEM)->elem.disp = (DISP); \
(PELEM)->elem.extent = (EXTENT); \
(PELEM)++; \
nbElems++; \
} while(0)
#define SAVE_ELEM( PELEM, TYPE, FLAGS, COUNT, DISP, EXTENT ) \
do { \
(PELEM)->flags = (FLAGS); \
(PELEM)->type = (TYPE); \
(PELEM)->count = (COUNT); \
(PELEM)->disp = (DISP); \
(PELEM)->extent = (EXTENT); \
(PELEM)->elem.common.flags = (FLAGS); \
(PELEM)->elem.common.type = (TYPE); \
(PELEM)->elem.count = (COUNT); \
(PELEM)->elem.disp = (DISP); \
(PELEM)->elem.extent = (EXTENT); \
(PELEM)++; \
nbElems++; \
} while(0)
@ -72,33 +72,32 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
totalDisp = 0;
while( stack_pos >= 0 ) {
if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
dt_loop_desc_t* pStartLoop;
if( DT_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
if( lastLength != 0 ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength, lastExtent );
lastDisp += lastLength;
lastLength = 0;
}
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].flags,
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].elem.common.flags,
nbElems - pStack->index + 1, /* # of elems in this loop */
pData->desc.desc[pos_desc].disp,
pData->desc.desc[pos_desc].extent );
pData->desc.desc[pos_desc].elem.disp,
pData->desc.desc[pos_desc].elem.extent );
if( --stack_pos >= 0 ) { /* still something to do ? */
pStartLoop = (dt_loop_desc_t*)&(pTypeDesc->desc[pStack->index - 1]);
pStartLoop->items = (pElemDesc - 1)->count;
ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop);
pStartLoop->items = (pElemDesc - 1)->elem.count;
totalDisp = pStack->disp; /* update the displacement position */
}
pStack--; /* go down one position on the stack */
pos_desc++;
continue;
}
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
dt_loop_desc_t* loop = (dt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
if( DT_LOOP == pData->desc.desc[pos_desc].elem.common.type ) {
ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
long loop_disp = pData->desc.desc[pos_desc + index].disp;
long loop_disp = pData->desc.desc[pos_desc + index].elem.disp;
if( loop->flags & DT_FLAG_CONTIGUOUS ) {
if( loop->common.flags & DT_FLAG_CONTIGUOUS ) {
/* the loop is contiguous or composed by contiguous elements with a gap */
if( loop->extent == end_loop->size ) {
/* the whole loop is contiguous */
@ -123,13 +122,13 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
/* we have a gap in the begining or the end of the loop but the whole
* loop can be merged in just one memcpy.
*/
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
counter, (long)2, pData->desc.desc[pos_desc].extent );
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].elem.common.flags,
counter, (long)2, pData->desc.desc[pos_desc].elem.extent );
SAVE_DESC( pElemDesc, loop_disp, end_loop->size, lastExtent );
SAVE_ELEM( pElemDesc, DT_END_LOOP, end_loop->flags,
SAVE_ELEM( pElemDesc, DT_END_LOOP, end_loop->common.flags,
2, end_loop->total_extent, end_loop->size );
}
pos_desc += pData->desc.desc[pos_desc].disp + 1;
pos_desc += pData->desc.desc[pos_desc].loop.items + 1;
changes++;
} else {
if( lastLength != 0 ) {
@ -137,29 +136,29 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
lastDisp += lastLength;
lastLength = 0;
}
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
pData->desc.desc[pos_desc].count, (long)nbElems,
pData->desc.desc[pos_desc].extent );
PUSH_STACK( pStack, stack_pos, nbElems, pData->desc.desc[pos_desc].count,
totalDisp, pos_desc + pData->desc.desc[pos_desc].disp );
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].elem.common.flags,
pData->desc.desc[pos_desc].elem.count, (long)nbElems,
pData->desc.desc[pos_desc].elem.extent );
PUSH_STACK( pStack, stack_pos, nbElems, pData->desc.desc[pos_desc].elem.count,
totalDisp, pos_desc + pData->desc.desc[pos_desc].elem.disp );
pos_desc++;
DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" );
}
totalDisp = pStack->disp; /* update the displacement */
continue;
}
while( pData->desc.desc[pos_desc].flags & DT_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
while( pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
/* now here we have a basic datatype */
type = pData->desc.desc[pos_desc].type;
if( (pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS) &&
(lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].disp) ) {
lastLength += pData->desc.desc[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
type = pData->desc.desc[pos_desc].elem.common.type;
if( (pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS) &&
(lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].elem.disp) ) {
lastLength += pData->desc.desc[pos_desc].elem.count * ompi_ddt_basicDatatypes[type]->size;
lastExtent = 1;
} else {
if( lastLength != 0 )
SAVE_DESC( pElemDesc, lastDisp, lastLength, lastExtent );
lastDisp = totalDisp + pData->desc.desc[pos_desc].disp;
lastLength = pData->desc.desc[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
lastDisp = totalDisp + pData->desc.desc[pos_desc].elem.disp;
lastLength = pData->desc.desc[pos_desc].elem.count * ompi_ddt_basicDatatypes[type]->size;
lastExtent = 1;
}
pos_desc++; /* advance to the next data */
@ -183,122 +182,124 @@ int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count,
#if defined(COMPILE_USELSS_CODE)
static int ompi_ddt_unroll( ompi_datatype_t* pData, int count )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pDestBuf;
int bConverted = 0, __index = 0, __sofar = 0;
dt_elem_desc_t* pElems;
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pDestBuf;
int bConverted = 0, __index = 0, __sofar = 0;
dt_elem_desc_t* pElems;
pDestBuf = NULL;
pDestBuf = NULL;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrc = (char*)pData->true_lb;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrc = (char*)pData->true_lb;
type = count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
bConverted += (pData->size * count);
} else {
char* pSrcBuf = (char*)pData->true_lb;
long extent = pData->ub - pData->lb;
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
pSrcBuf += extent;
pDestBuf += pData->size;
}
bConverted += type;
}
return (bConverted == (pData->size * count));
}
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
type = count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
bConverted += (pData->size * count);
} else {
char* pSrcBuf = (char*)pData->true_lb;
long extent = pData->ub - pData->lb;
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
pSrcBuf += extent;
pDestBuf += pData->size;
}
bConverted += type;
}
return (bConverted == (pData->size * count));
}
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DDT_DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n", pStack->index, pStack->count );
DDT_DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n", pStack->index, pStack->count );
while( pos_desc >= 0 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
continue;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
i = 1;
while( pos_desc >= 0 ) {
if( DT_END_LOOP == pElems[pos_desc].type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
i = 0;
pos_desc = pStack->index;
if( pos_desc == -1 ) {
pStack->disp += (pData->ub - pData->lb);
} else {
assert( DT_LOOP == pElems[pos_desc].elem.common.type );
pStack->disp += pElems[pos_desc].loop.extent;
}
}
lastLength = pLast->extent;
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
pos_desc++;
continue;
}
if( DT_LOOP == pElems[pos_desc].type ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
i = 1;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
i = 0;
}
lastLength = pLast->extent;
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += lastLength;
bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
}
pos_desc++; /* advance to the next data */
}
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
return OMPI_SUCCESS;
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += lastLength;
bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
}
pos_desc++; /* advance to the next data */
}
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
return OMPI_SUCCESS;
}
#endif /* COMPILE_USELSS_CODE */
int32_t ompi_ddt_commit( ompi_datatype_t** data )
{
ompi_datatype_t* pData = *data;
dt_endloop_desc_t* pLast = (dt_endloop_desc_t*)&(pData->desc.desc[pData->desc.used]);
ddt_endloop_desc_t* pLast = &(pData->desc.desc[pData->desc.used].end_loop);
if( pData->flags & DT_FLAG_COMMITED ) return OMPI_SUCCESS;
pData->flags |= DT_FLAG_COMMITED;
@ -306,8 +307,8 @@ int32_t ompi_ddt_commit( ompi_datatype_t** data )
/* let's add a fake element at the end just to avoid useless comparaisons
* in pack/unpack functions.
*/
pLast->type = DT_END_LOOP;
pLast->flags = 0;
pLast->common.type = DT_END_LOOP;
pLast->common.flags = 0;
pLast->items = pData->desc.used;
pLast->total_extent = pData->ub - pData->lb;
pLast->size = pData->size;
@ -318,9 +319,9 @@ int32_t ompi_ddt_commit( ompi_datatype_t** data )
/* let's add a fake element at the end just to avoid useless comparaisons
* in pack/unpack functions.
*/
pLast = (dt_endloop_desc_t*)&(pData->opt_desc.desc[pData->opt_desc.used]);
pLast->type = DT_END_LOOP;
pLast->flags = 0;
pLast = &(pData->opt_desc.desc[pData->opt_desc.used].end_loop);
pLast->common.type = DT_END_LOOP;
pLast->common.flags = 0;
pLast->items = pData->opt_desc.used;
pLast->total_extent = pData->ub - pData->lb;
pLast->size = pData->size;

Просмотреть файл

@ -79,7 +79,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
pInput = iov[iov_count].iov_base;
iCount = iov[iov_count].iov_len;
while( 1 ) {
if( pElem[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( DT_END_LOOP == pElem[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConvertor->stack_pos == 0 )
goto complete_loop; /* completed */
@ -91,47 +91,48 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
if( pStack->index == -1 ) {
pStack->disp += (pData->ub - pData->lb);
} else {
pStack->disp += pElem[pStack->index].extent;
assert( DT_LOOP == pElem[pStack->index].elem.common.type );
pStack->disp += pElem[pStack->index].loop.extent;
}
}
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
count_desc = pElem[pos_desc].elem.count;
disp_desc = pElem[pos_desc].elem.disp;
}
if( pElem[pos_desc].type == DT_LOOP ) {
if( DT_LOOP == pElem[pos_desc].elem.common.type ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElem[pos_desc].count,
pStack->disp, pos_desc + pElem[pos_desc].disp + 1);
pos_desc, pElem[pos_desc].elem.count,
pStack->disp, pos_desc + pElem[pos_desc].elem.disp + 1);
pos_desc++;
} while( pElem[pos_desc].type == DT_LOOP ); /* let's start another loop */
} while( DT_LOOP == pElem[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loops" );
/* update the current state */
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
count_desc = pElem[pos_desc].elem.count;
disp_desc = pElem[pos_desc].elem.disp;
continue;
}
while( pElem[pos_desc].flags & DT_FLAG_DATA ) {
while( pElem[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = pElem[pos_desc].type;
type = pElem[pos_desc].elem.common.type;
rc = pConvertor->pFunctions[type]( count_desc,
pOutput + pStack->disp + disp_desc,
iCount, pElem[pos_desc].extent,
pInput, iCount, ompi_ddt_basicDatatypes[type]->size );
advance = rc * ompi_ddt_basicDatatypes[type]->size;
iCount, pElem[pos_desc].elem.extent,
pInput, iCount, BASIC_DDT_FROM_ELEM(pElem[pos_desc])->size );
advance = rc * BASIC_DDT_FROM_ELEM(pElem[pos_desc])->size;
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
count_desc -= rc;
disp_desc += rc * pElem[pos_desc].extent;
disp_desc += rc * pElem[pos_desc].elem.extent;
if( iCount != 0 )
printf( "pack there is still room in the input buffer %d bytes\n", iCount );
goto complete_loop;
}
pos_desc++; /* advance to the next data */
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
count_desc = pElem[pos_desc].elem.count;
disp_desc = pElem[pos_desc].elem.disp;
if( iCount == 0 ) goto complete_loop; /* break if there is no more data in the buffer */
}
}
@ -188,7 +189,7 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
pConv->stack_pos--;
while( 1 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConv->stack_pos == 0 ) { /* finish everything */
last_count = 0;
@ -203,35 +204,36 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
pStack->disp += (pData->ub - pData->lb);
pos_desc = 0;
} else {
pStack->disp += pElems[pos_desc].extent;
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
pos_desc = pStack->index + 1;
}
}
last_count = pElems[pos_desc].count;
last_count = pElems[pos_desc].elem.count;
last_blength = last_count;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
continue;
}
while( pElems[pos_desc].type == DT_LOOP ) {
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
int stop_in_loop = 0;
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &(pElems[pos_desc + pElems[pos_desc].disp]);
if( (pLast->extent * last_count) > (int)space ) {
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
if( (end_loop->size * last_count) > space ) {
stop_in_loop = last_count;
last_count = space / pLast->extent;
last_count = space / end_loop->size;
}
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
pDestBuf += pLast->extent; /* size of the contiguous data */
lastDisp += pElems[pos_desc].extent;
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, end_loop->size );
pDestBuf += end_loop->size; /* size of the contiguous data */
lastDisp += pElems[pos_desc].loop.extent;
}
space -= (pLast->extent * last_count);
bConverted += (pLast->extent * last_count);
space -= (end_loop->size * last_count);
bConverted += (end_loop->size * last_count);
if( stop_in_loop == 0 ) {
pos_desc += pElems[pos_desc].disp + 1;
last_count = pElems[pos_desc].count;
pos_desc += pElems[pos_desc].loop.items + 1;
last_count = pElems[pos_desc].elem.count;
continue;
}
/* mark some of the iterations as completed */
@ -240,18 +242,18 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
/* Save the stack with the correct last_count value. */
}
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
pos_desc++;
last_count = pElems[pos_desc].count;
last_count = pElems[pos_desc].elem.count;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* do we have enough space in the buffer ? */
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
if( space < last_blength ) {
last_blength = last_count;
last_count = space / ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
space -= (last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size);
last_count = space / BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
space -= (last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size);
last_blength -= last_count;
goto end_loop; /* or break whatever but go out of this while */
}
@ -262,8 +264,8 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
space -= last_blength;
pDestBuf += last_blength;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
}
}
last_count = 0; /* complete the data */
@ -323,7 +325,7 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
/* retrieve the context of the last call */
pos_desc = pStack->index;
last_count = pStack->count;
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
lastDisp = pStack->disp;
savePos = (char*)pConv->pBaseBuf + pStack->disp;
saveLength = 0;
@ -334,7 +336,7 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
space_on_iovec = iov[0].iov_len;
while( pos_desc >= 0 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConv->stack_pos == 0 ) { /* finish everything */
if( saveLength != 0 ) {
@ -380,26 +382,28 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
pStack--;
} else {
pos_desc = pStack->index; /* DT_LOOP index */
if( pos_desc == -1 )
if( pos_desc == -1 ) {
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
} else {
assert( DT_LOOP == pElems[pos_desc].elem.common.type );
pStack->disp += pElems[pos_desc].loop.extent;
}
}
pos_desc++; /* go to the next element */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
continue; /* next loop */
}
while( pElems[pos_desc].type == DT_LOOP ) {
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
int stop_in_loop = 0;
/* If the loop container is contiguous then we can do some
* optimizations.
*/
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
/* point to the end of loop element */
dt_elem_desc_t* pLast = &(pElems[pos_desc + pElems[pos_desc].disp]);
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
if( iov[iov_pos].iov_base == NULL ) {
iov[iov_pos].iov_base = pConv->memAlloc_fn( &(iov[iov_pos].iov_len) );
space_on_iovec = iov[iov_pos].iov_len;
@ -407,32 +411,32 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
(*freeAfter) |= (1 << iov_pos);
}
/* compute the maximum amount of data to be packed */
if( (pLast->extent * last_count) > (int)space_on_iovec ) {
if( (end_loop->size * last_count) > space_on_iovec ) {
stop_in_loop = last_count;
last_count = space_on_iovec / pLast->extent;
last_count = space_on_iovec / end_loop->size;
}
/* Now let's do it */
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
pConv->pBaseBuf, pData, pConv->count );
DO_DEBUG (ompi_output( 0, "2. memcpy( %p, %p, %ld )\n", pDestBuf, pConv->pBaseBuf + lastDisp,
pLast->extent ); );
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
lastDisp += pElems[pos_desc].extent;
pDestBuf += pLast->extent;
end_loop->size ); );
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, end_loop->size );
lastDisp += pElems[pos_desc].loop.extent;
pDestBuf += end_loop->size;
}
DO_DEBUG( ompi_output( 0, "\t\tbConverted %ld space %ld pConv->bConverted %ld\n",
bConverted, space_on_iovec, pConv->bConverted ); );
i = pLast->extent * last_count; /* temporary value */
i = end_loop->size * last_count; /* temporary value */
space_on_iovec -= i;
space -= i;
bConverted += i;
if( stop_in_loop == 0 ) { /* did I stop before the end */
/* the pElems point to the LOOP struct */
pos_desc += pElems[pos_desc].disp + 1;
last_count = pElems[pos_desc].count;
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
lastDisp = pStack->disp + pElems[pos_desc].disp;
/* the pElems point to the LOOP struct in the begining */
pos_desc += pElems[pos_desc].loop.items + 1;
last_count = pElems[pos_desc].elem.count;
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
continue;
}
/* mark some of the iterations as completed */
@ -441,14 +445,14 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
/* Save the stack with the correct last_count value. */
}
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
pos_desc++;
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* first let's see if it's contiguous with the previous chunk of memory and
* we still have enough room in the buffer...
*/
@ -539,9 +543,9 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
if( saveLength > space ) /* this will be the last element copied this time */
continue;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
}
}
end_loop:
@ -580,13 +584,12 @@ ompi_convertor_pack_no_conv_contig( ompi_convertor_t* pConv,
*/
pSrc = pConv->pBaseBuf + pStack[0].disp + pStack[1].disp;
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
if( iov[iov_count].iov_len > length )
iov[iov_count].iov_len = length;
if( iov[iov_count].iov_base == NULL ) {
iov[iov_count].iov_base = pSrc;
if( iov[iov_count].iov_len > length )
iov[iov_count].iov_len = length;
} else {
/* contiguous data just memcpy the smallest data in the user buffer */
iov[iov_count].iov_len = IMIN( iov[iov_count].iov_len, length );
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[iov_count].iov_len,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( iov[iov_count].iov_base, pSrc, iov[iov_count].iov_len);
@ -691,13 +694,12 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
}
if( (long)pData->size == extent ) { /* that really contiguous */
/* contiguous data just memcpy the smallest data in the user buffer */
if( (pConv->bConverted + iov[iov_count].iov_len) > length )
iov[iov_count].iov_len = length - pConv->bConverted;
if( iov[iov_count].iov_base == NULL ) {
iov[iov_count].iov_base = pSrc;
if( (pConv->bConverted + iov[iov_count].iov_len) > length )
iov[iov_count].iov_len = length - pConv->bConverted;
} else {
/* contiguous data just memcpy the smallest data in the user buffer */
iov[iov_count].iov_len = IMIN( iov[iov_count].iov_len, length );
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[iov_count].iov_len,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( iov[iov_count].iov_base, pSrc, iov[iov_count].iov_len);

Просмотреть файл

@ -32,9 +32,9 @@ void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos, const dt_elem
pStack->count, pStack->disp, pStack->end_loop );
if( pStack->index != -1 )
printf( "[desc count %d disp %ld extent %d]\n",
pDesc[pStack->index].count,
pDesc[pStack->index].disp,
pDesc[pStack->index].extent );
pDesc[pStack->index].elem.count,
pDesc[pStack->index].elem.disp,
pDesc[pStack->index].elem.extent );
else
printf( "\n" );
}
@ -91,7 +91,7 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
pInput = iov[iov_count].iov_base;
iCount = iov[iov_count].iov_len;
while( 1 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConvertor->stack_pos == 0 )
goto save_and_return; /* completed */
@ -99,33 +99,35 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
pStack--;
}
if( pStack->index == -1 )
if( pStack->index == -1 ) {
pStack->disp += (pConvertor->pDesc->ub - pConvertor->pDesc->lb);
else
pStack->disp += pElems[pStack->index].extent;
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
}
pos_desc = pStack->index + 1;
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
count_desc = pElems[pos_desc].elem.count;
disp_desc = pElems[pos_desc].elem.disp;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( DT_LOOP == pElems[pos_desc].elem.common.type ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp + 1 );
pos_desc, pElems[pos_desc].loop.loops,
pStack->disp, pos_desc + pElems[pos_desc].loop.items + 1 );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
} while( DT_LOOP == pElems[pos_desc].loop.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
/* update the current state */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
count_desc = pElems[pos_desc].elem.count;
disp_desc = pElems[pos_desc].elem.disp;
}
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
type = pElems[pos_desc].elem.common.type;
rc = pConvertor->pFunctions[type]( count_desc,
pInput, iCount, ompi_ddt_basicDatatypes[type]->size,
pConvertor->pBaseBuf + pStack->disp + disp_desc,
oCount, pElems[pos_desc].extent );
oCount, pElems[pos_desc].elem.extent );
advance = rc * ompi_ddt_basicDatatypes[type]->size;
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
@ -133,14 +135,14 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
count_desc -= rc;
disp_desc += rc * pElems[pos_desc].extent;
disp_desc += rc * pElems[pos_desc].elem.extent;
if( iCount != 0 )
printf( "unpack there is still room in the input buffer %d bytes\n", iCount );
goto save_and_return;
}
pos_desc++; /* advance to the next data */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
count_desc = pElems[pos_desc].elem.count;
disp_desc = pElems[pos_desc].elem.disp;
if( iCount == 0 )
goto save_and_return; /* break if there is no more data in the buffer */
}
@ -194,7 +196,7 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
pConv->stack_pos--;
while( 1 ) { /* loop forever. The exit condition is detected inside the while loop */
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConv->stack_pos == 0 ) {
last_blength = 0; /* nothing to copy anymore */
@ -204,37 +206,39 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
pConv->stack_pos--;
pos_desc++;
} else {
if( pStack->index == -1 )
if( pStack->index == -1 ) {
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pStack->index].extent;
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
}
pos_desc = pStack->index + 1;
}
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
continue;
}
while( pElems[pos_desc].type == DT_LOOP ) {
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
int stop_in_loop = 0;
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &(pElems[pos_desc + pElems[pos_desc].disp]);
last_count = pElems[pos_desc].count;
if( (pLast->extent * last_count) > space ) {
if( pElems[pos_desc].loop.common.flags & DT_FLAG_CONTIGUOUS ) {
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
last_count = pElems[pos_desc].loop.loops;
if( (end_loop->size * last_count) > space ) {
stop_in_loop = last_count;
last_count = space / pLast->extent;
last_count = space / end_loop->size;
}
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, pLast->extent );
pSrcBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, end_loop->size );
pSrcBuf += end_loop->size;
lastDisp += pElems[pos_desc].loop.extent;
}
space -= (pLast->extent * last_count);
bConverted += (pLast->extent * last_count);
space -= (end_loop->size * last_count);
bConverted += (end_loop->size * last_count);
if( stop_in_loop == 0 ) {
pos_desc += pElems[pos_desc].disp + 1;
last_count = pElems[pos_desc].count;
pos_desc += pElems[pos_desc].loop.items + 1;
last_count = pElems[pos_desc].elem.count;
continue;
}
last_count = stop_in_loop - last_count;
@ -242,20 +246,21 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
/* Save the stack with the correct last_count value. */
}
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
pos_desc++;
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
/* do we have enough space in the buffer ? */
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
last_blength = last_count * basic_type->size;
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
if( space < last_blength ) {
last_blength = space / ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
last_blength = space / basic_type->size;
last_count -= last_blength;
last_blength *= ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
last_blength *= basic_type->size;
space -= last_blength;
goto end_loop; /* or break whatever but go out of this while */
}
@ -268,19 +273,19 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
} else {
uint32_t i;
last_blength = ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
last_blength = basic_type->size;
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_blength,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_blength );
lastDisp += pElems[pos_desc].extent;
pSrcBuf += ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
lastDisp += pElems[pos_desc].elem.extent;
pSrcBuf += basic_type->size;
}
bConverted += ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size * last_count;
bConverted += basic_type->size * last_count;
}
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
}
}
end_loop:
@ -660,7 +665,6 @@ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSi
{
dt_stack_t* pStack; /* pointer to the position on the stack */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int rc, nbElems = 0;
int stack_pos = 0;
@ -677,7 +681,7 @@ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSi
pos_desc = 0;
while( 1 ) { /* loop forever the exit conditionis on the last section */
if( datatype->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( DT_END_LOOP == datatype->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
stack_pos--;
pStack--;
@ -687,31 +691,32 @@ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSi
if( pStack->index == -1 ) {
pStack->disp += (datatype->ub - datatype->lb);
} else {
pStack->disp += datatype->desc.desc[pos_desc].extent;
assert( DT_LOOP == datatype->desc.desc[pos_desc].elem.common.type );
pStack->disp += datatype->desc.desc[pos_desc].loop.extent;
}
pos_desc = pStack->index + 1;
continue;
}
if( datatype->desc.desc[pos_desc].type == DT_LOOP ) {
if( DT_LOOP == datatype->desc.desc[pos_desc].elem.common.type ) {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, datatype->desc.desc[pos_desc].count,
0, pos_desc + datatype->desc.desc[pos_desc].disp );
PUSH_STACK( pStack, stack_pos, pos_desc, datatype->desc.desc[pos_desc].loop.loops,
0, pos_desc + datatype->desc.desc[pos_desc].loop.items );
pos_desc++;
} while( datatype->desc.desc[pos_desc].type == DT_LOOP ); /* let's start another loop */
} while( DT_LOOP == datatype->desc.desc[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pStack, stack_pos, datatype->desc.desc, "advance loops" );
continue;
}
while( datatype->desc.desc[pos_desc].flags & DT_FLAG_DATA ) {
while( datatype->desc.desc[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = datatype->desc.desc[pos_desc].type;
rc = datatype->desc.desc[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(datatype->desc.desc[pos_desc]);
rc = datatype->desc.desc[pos_desc].elem.count * basic_type->size;
if( rc >= iSize ) {
rc = iSize / ompi_ddt_basicDatatypes[type]->size;
rc = iSize / basic_type->size;
nbElems += rc;
iSize -= rc * ompi_ddt_basicDatatypes[type]->size;
iSize -= rc * basic_type->size;
return (iSize == 0 ? nbElems : -1);
}
nbElems += datatype->desc.desc[pos_desc].count;
nbElems += datatype->desc.desc[pos_desc].elem.count;
iSize -= rc;
pos_desc++; /* advance to the next data */
}
@ -783,7 +788,7 @@ int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* datatype, int32_t
}
while( 1 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) goto end_loop;
@ -791,31 +796,33 @@ int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* datatype, int32_t
pos_desc++;
} else {
DDT_DUMP_STACK( pStack, stack_pos, pElems, "decrease loop count" );
if( pStack->index == -1 )
if( pStack->index == -1 ) {
pStack->disp += (datatype->ub - datatype->lb);
else
pStack->disp += pElems[pStack->index].extent;
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
}
pos_desc = pStack->index + 1;
}
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( DT_LOOP == pElems[pos_desc].elem.common.type ) {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].loop.loops,
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
} while( DT_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
}
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
if( (lastDisp + lastLength) != (pStack->disp + pElems[pos_desc].disp) ) {
if( (lastDisp + lastLength) != (pStack->disp + pElems[pos_desc].elem.disp) ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, datatype, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
lastLength = 0;
}
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
lastLength += pElems[pos_desc].elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
pos_desc++; /* advance to the next data */
}
}

Просмотреть файл

@ -45,10 +45,9 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type, lastLength = 0;
int lastLength = 0, loop_length;
ompi_datatype_t* pData = pConvertor->pDesc;
int* remoteLength;
int loop_length;
int resting_place = starting_point;
dt_elem_desc_t* pElems;
size_t remote_size;
@ -84,7 +83,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
if( pConvertor->flags & CONVERTOR_HOMOGENEOUS ) {
loop_length = GET_FIRST_NON_LOOP( pElems );
pStack->disp = pElems[loop_length].disp;
pStack->disp = pElems[loop_length].elem.disp;
/* Special case for contiguous datatypes */
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
@ -94,8 +93,8 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
pStack->count -= cnt;
cnt = starting_point - cnt * pData->size; /* number of bytes after the loop */
pStack[1].index = 0;
pStack[1].count = (pElems[loop_length].count *
ompi_ddt_basicDatatypes[pElems[loop_length].type]->size) - cnt;
pStack[1].count = (pElems[loop_length].elem.count *
ompi_ddt_basicDatatypes[pElems[loop_length].elem.common.type]->size) - cnt;
pStack[1].end_loop = pStack->end_loop;
if( (long)pData->size == extent ) { /* all elements are contiguous */
@ -119,7 +118,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
pConvertor->bConverted += (remote_size * count);
loop_length = GET_FIRST_NON_LOOP( pElems );
pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].disp;
pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].elem.disp;
pos_desc = 0;
remoteLength = (int*)alloca( sizeof(int) * (pConvertor->pDesc->btypes[DT_LOOP] + 1));
@ -131,8 +130,8 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
next_loop:
loop_length = remoteLength[pConvertor->stack_pos];
while( pos_desc < pConvertor->pStack[0].end_loop ) { /* protect in case when the starting_pos is bigger than the total size */
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems;
if( DT_END_LOOP == pElems->elem.common.type ) { /* end of the current loop */
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)pElems;
long extent;
pStack->count--;
@ -147,7 +146,8 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
if( pStack->index == -1 ) {
extent = pData->ub - pData->lb;
} else {
extent = ((dt_loop_desc_t*)(pElems - end_loop->items + 1))->extent;
assert( DT_LOOP == pElems[1 - end_loop->items].loop.common.type );
extent = ((ddt_loop_desc_t*)(pElems - end_loop->items + 1))->extent;
}
pStack->count -= cnt;
resting_place -= cnt * loop_length;
@ -174,27 +174,27 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
pElems++;
goto next_loop;
}
if( pElems->type == DT_LOOP ) {
if( DT_LOOP == pElems->elem.common.type ) {
remoteLength[pConvertor->stack_pos] += loop_length;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pData->desc.desc[pos_desc].count,
pStack->disp, pos_desc + pElems->disp );
pElems->loop.loops, /*pData->desc.desc[pos_desc].loop.loops,*/
pStack->disp, pos_desc + pElems->loop.items );
remoteLength[pConvertor->stack_pos] = 0;
pos_desc++;
pElems++;
loop_length = 0; /* starting a new loop */
}
while( pElems->flags & DT_FLAG_DATA ) {
while( pElems->elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = pElems->type;
lastLength = pElems->count * ompi_ddt_basicDatatypes[type]->size;
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM( (*pElems) );
lastLength = pElems->elem.count * basic_type->size;
if( resting_place < lastLength ) {
int cnt = resting_place / ompi_ddt_basicDatatypes[type]->size;
loop_length += cnt * ompi_ddt_basicDatatypes[type]->size;
resting_place -= (cnt * ompi_ddt_basicDatatypes[type]->size);
int cnt = resting_place / basic_type->size;
loop_length += cnt * basic_type->size;
resting_place -= (cnt * basic_type->size);
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pElems->count - cnt,
pStack->disp + pElems->disp + cnt * pElems->extent,
pElems->elem.count - cnt,
pStack->disp + pElems->elem.disp + cnt * pElems->elem.extent,
pos_desc );
pConvertor->bConverted = starting_point - resting_place;
return OMPI_SUCCESS;