1
1

Reorder the datatype definition and use a storage class coprresponding to the kind of data it's supposed to hold.

Change the stack usage to make things faster:
 - the first of the stack is always used by the count argument
 - the second is initialized to the first position in the elements list.
 - when the ddt engine advance the last stack position hold the informations about the last processed element (the one from where we have to start the next time).

  Modify the pack/unpack relative functions to reflect the changes in the stack usage. In same time I decrease the amount of code (yup yup) and reorder it a little bit. I get some speedup on my laptop ...

This commit was SVN r3221.
Этот коммит содержится в:
George Bosilca 2004-10-19 23:36:31 +00:00
родитель faef97cf25
Коммит 9ba3ae9c9d
10 изменённых файлов: 599 добавлений и 508 удалений

Просмотреть файл

@ -66,30 +66,30 @@ typedef struct __dt_struct_desc {
*/
typedef struct ompi_datatype_t {
ompi_object_t super; /**< basic superclass */
u_int32_t size; /**< total size in bytes of the memory used by the data if
* the data is put on a contiguous buffer */
long true_lb;
long true_ub; /**< the true ub of the data without user defined lb and ub */
u_int32_t align; /**< data should be aligned to */
long lb; /**< lower bound in memory */
long ub; /**< upper bound in memory */
u_int16_t flags; /**< the flags */
u_int16_t id; /**< data id, normally the index in the data array. */
u_int32_t nbElems; /**< total number of elements inside the datatype */
u_int64_t bdt_used; /**< which basic datatypes are used in the data description */
unsigned long size; /**< total size in bytes of the memory used by the data if
* the data is put on a contiguous buffer */
u_int32_t align; /**< data should be aligned to */
long true_lb;
long true_ub; /**< the true ub of the data without user defined lb and ub */
long lb; /**< lower bound in memory */
long ub; /**< upper bound in memory */
u_int16_t flags; /**< the flags */
u_int16_t id; /**< data id, normally the index in the data array. */
u_int32_t nbElems; /**< total number of elements inside the datatype */
u_int64_t bdt_used; /**< which basic datatypes are used in the data description */
/* Attribute fields */
ompi_hash_table_t *d_keyhash;
int d_f_to_c_index;
char name[MPI_MAX_OBJECT_NAME];
dt_type_desc_t desc; /**< the data description */
dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless
* or in the send case (without conversion) */
void* args; /**< data description for the user */
size_t d_f_to_c_index;
char name[MPI_MAX_OBJECT_NAME];
dt_type_desc_t desc; /**< the data description */
dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless
* or in the send case (without conversion) */
void* args; /**< data description for the user */
/* basic elements count used to compute the size of the datatype for
* remote nodes */
u_int32_t btypes[DT_MAX_PREDEFINED];
u_int32_t btypes[DT_MAX_PREDEFINED];
} dt_desc_t, ompi_datatype_t;
OBJ_CLASS_DECLARATION( ompi_datatype_t );
@ -174,8 +174,8 @@ struct ompi_convertor_t {
dt_stack_t* pStack;
/* the convertor functions pointer */
/* the local stack for the actual conversion */
int32_t converted; /* the number of already converted elements */
int32_t bConverted; /* the size of already converted elements in bytes */
u_int32_t converted; /* the number of already converted elements */
u_int32_t bConverted; /* the size of already converted elements in bytes */
u_int32_t flags;
u_int32_t count;
u_int32_t stack_pos;

Просмотреть файл

@ -80,29 +80,29 @@
#define DT_INCREASE_STACK 32
struct __dt_stack {
int32_t index;
int32_t count;
int32_t end_loop;
long disp;
int32_t index; /**< index in the element description */
int32_t count; /**< number of times we still have to do it */
int32_t end_loop; /**< for loops the end of the loop, otherwise useless */
long disp; /**< actual displacement depending on the count field */
};
/* These 2 typedefs are the same as the dt_elem_desc_t except
* for the name of the fields.
*/
typedef struct __dt_loop_desc {
u_int16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */
u_int32_t count; /**< number of elements */
long disp; /**< displacement of the first element */
u_int32_t extent; /**< extent of each element */
u_int16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */
u_int32_t loops; /**< number of times the loop have to be done */
long items; /**< number of items in the loop */
u_int32_t extent; /**< extent of the whole loop */
} dt_loop_desc_t;
typedef struct __dt_endloop_desc {
u_int16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */
u_int32_t count; /**< number of elements */
long disp; /**< displacement of the first element */
u_int32_t extent; /**< extent of each element */
u_int16_t flags; /**< flags for the record */
u_int16_t type; /**< the basic data type id */
u_int32_t items; /**< number of items in the loop */
long total_extent; /**< total extent of the loop taking in account the repetitions */
u_int32_t size; /**< real size of the data in the loop */
} dt_endloop_desc_t;
/* keep the last 16 bits free for data flags */
@ -176,6 +176,30 @@ do { \
/*printf( "memcpy dest = %p src = %p length = %d\n", (void*)(DST), (void*)(SRC), (int)(BLENGTH) );*/ \
memcpy( (DST), (SRC), (BLENGTH) ); }
#define OMPI_DDT_SAFEGUARD_POINTER( ACTPTR, LENGTH, INITPTR, PDATA, COUNT ) \
ompi_ddt_safeguard_pointer( (ACTPTR), (LENGTH), (INITPTR), (PDATA), (COUNT) )
static inline void ompi_ddt_safeguard_pointer( void* actual_ptr, int length,
void* initial_ptr,
ompi_datatype_t* pData,
int count )
{
char* lower_bound = (char*)initial_ptr;
char* upper_bound = (char*)initial_ptr;
if( (length == 0) || (count == 0) ) return;
lower_bound += pData->lb;
upper_bound += pData->ub * (count - 1) + pData->true_ub;
if( (char*)actual_ptr >= lower_bound )
/* Im up from the lower bound */
if( ((char*)actual_ptr + length) <= upper_bound )
return;
printf( "Pointer %p size %d is outside [%p,%p] for data \n", actual_ptr, length,
lower_bound, upper_bound );
ompi_ddt_dump( pData );
}
#ifdef USELESS
#define MEMCPY_LIMIT 1
@ -207,16 +231,35 @@ do { \
} while(0)
#endif /* USELESS */
static inline int GET_FIRST_NON_LOOP( dt_elem_desc_t* _pElem )
{
int index = 0;
/* We dont have to check for the end as we always put an END_LOOP
* at the end of all datatype descriptions.
*/
while( _pElem->type == DT_LOOP ) {
++_pElem; index++;
}
return index;
}
static inline
int ompi_convertor_create_stack_at_begining( ompi_convertor_t* pConvertor, int* sizes )
{
ompi_datatype_t* pData = pConvertor->pDesc;
dt_elem_desc_t* pElems;
int index;
pConvertor->stack_pos = 2;
pConvertor->stack_pos = 1;
/* Fill the first position on the stack. This one correspond to the
* last fake DT_END_LOOP that we add to the data representation and
* allow us to move quickly inside the datatype when we have a count.
*/
pConvertor->pStack[0].index = -1;
pConvertor->pStack[0].count = pConvertor->count;
pConvertor->pStack[0].disp = 0;
index = GET_FIRST_NON_LOOP(pData->desc.desc);
pConvertor->pStack[0].disp = pElems[index].disp;
/* first here we should select which data representation will be used for
* this operation: normal one or the optimized version ? */
if( pData->opt_desc.used > 0 ) {
@ -228,13 +271,8 @@ int ompi_convertor_create_stack_at_begining( ompi_convertor_t* pConvertor, int*
}
pConvertor->pStack[1].index = 0;
pConvertor->pStack[1].count = pElems->count;
pConvertor->pStack[1].disp = pElems->disp;
pConvertor->pStack[1].disp = pConvertor->pStack[0].disp;
pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
/* always fill with ZEROS on the begining */
pConvertor->pStack[2].index = 0;
pConvertor->pStack[2].count = 0;
pConvertor->pStack[2].disp = 0;
pConvertor->pStack[2].end_loop = 0;
/* And set the correct status */
pConvertor->converted = 0;
pConvertor->bConverted = 0;

Просмотреть файл

@ -19,7 +19,7 @@ int mpich_typeub( void )
int blens[2];
dt_desc_t *type1, *type2, *type3, *types[2];
ompi_ddt_create_vector( 2, 1, 4, &(basicDatatypes[DT_INT]), &type1 );
ompi_ddt_create_vector( 2, 1, 4, ompi_ddt_basicDatatypes[DT_INT], &type1 );
ompi_ddt_commit( &type1 );
ompi_ddt_get_extent( type1, &lb, &extent );
extent1 = 5 * sizeof(int);
@ -34,7 +34,7 @@ int mpich_typeub( void )
displ[0]=0;
displ[1]=sizeof(int)*4;
types[0]=type1;
types[1]=&(basicDatatypes[DT_UB]);
types[1]=ompi_ddt_basicDatatypes[DT_UB];
extent2 = displ[1];
/* using MPI_UB and Type_struct, monkey with the extent, making it 16
@ -54,7 +54,7 @@ int mpich_typeub( void )
*/
displ[1]=sizeof(int);
types[0]=type2;
types[1]=&(basicDatatypes[DT_UB]);
types[1]=ompi_ddt_basicDatatypes[DT_UB];
extent3 = extent2;
ompi_ddt_create_struct( 2, blens, displ, types, &type3 );
@ -88,9 +88,9 @@ int mpich_typeub2( void )
disp[0] = -3;
disp[1] = 0;
disp[2] = 6;
types[0] = &(basicDatatypes[DT_LB]);
types[1] = &(basicDatatypes[DT_INT]);
types[2] = &(basicDatatypes[DT_UB]);
types[0] = ompi_ddt_basicDatatypes[DT_LB];
types[1] = ompi_ddt_basicDatatypes[DT_INT];
types[2] = ompi_ddt_basicDatatypes[DT_UB];
ompi_ddt_create_struct(3,blocklen,disp, types,&dt1);
ompi_ddt_commit(&dt1);
@ -167,9 +167,9 @@ int mpich_typeub3( void )
disp[0] = -3;
disp[1] = 0;
disp[2] = 6;
types[0] = &(basicDatatypes[DT_LB]);
types[1] = &(basicDatatypes[DT_INT]);
types[2] = &(basicDatatypes[DT_UB]);
types[0] = ompi_ddt_basicDatatypes[DT_LB];
types[1] = ompi_ddt_basicDatatypes[DT_INT];
types[2] = ompi_ddt_basicDatatypes[DT_UB];
/* Generate samples for contiguous, hindexed, hvector, indexed, and vector (struct and contiguous tested in typeub2) */
ompi_ddt_create_struct(3,blocklen,disp, types,&dt1);
@ -309,7 +309,7 @@ dt_desc_t* upper_matrix( unsigned int mat_size )
blocklen[i] = mat_size - i;
}
ompi_ddt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
ompi_ddt_create_indexed( mat_size, blocklen, disp, ompi_ddt_basicDatatypes[DT_DOUBLE],
&upper );
free( disp );
free( blocklen );
@ -330,7 +330,7 @@ dt_desc_t* lower_matrix( unsigned int mat_size )
blocklen[i] = i;
}
ompi_ddt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
ompi_ddt_create_indexed( mat_size, blocklen, disp, ompi_ddt_basicDatatypes[DT_DOUBLE],
&upper );
free( disp );
free( blocklen );
@ -415,7 +415,7 @@ dt_desc_t* test_matrix_borders( unsigned int size, unsigned int width )
disp[1] = (size - width) * sizeof(double);
blocklen[1] = width;
ompi_ddt_create_indexed( 2, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
ompi_ddt_create_indexed( 2, blocklen, disp, ompi_ddt_basicDatatypes[DT_DOUBLE],
&pdt_line );
ompi_ddt_create_contiguous( size, pdt_line, &pdt );
OBJ_RELEASE( pdt_line ); assert( pdt_line == NULL );
@ -428,9 +428,9 @@ dt_desc_t* test_contiguous( void )
printf( "test contiguous (alignement)\n" );
pdt1 = ompi_ddt_create( -1 );
ompi_ddt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 );
ompi_ddt_add( pdt1, ompi_ddt_basicDatatypes[DT_DOUBLE], 1, 0, -1 );
ompi_ddt_dump( pdt1 );
ompi_ddt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 );
ompi_ddt_add( pdt1, ompi_ddt_basicDatatypes[DT_CHAR], 1, 8, -1 );
ompi_ddt_dump( pdt1 );
ompi_ddt_create_contiguous( 4, pdt1, &pdt2 );
OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL );
@ -443,17 +443,17 @@ dt_desc_t* test_contiguous( void )
dt_desc_t* test_struct( void )
{
dt_desc_t* types[] = { &(basicDatatypes[DT_FLOAT]),
dt_desc_t* types[] = { ompi_ddt_basicDatatypes[DT_FLOAT],
NULL,
&(basicDatatypes[DT_CHAR]) };
ompi_ddt_basicDatatypes[DT_CHAR] };
int lengths[] = { 2, 1, 3 };
long disp[] = { 0, 16, 26 };
dt_desc_t* pdt, *pdt1;
printf( "test struct\n" );
pdt1 = ompi_ddt_create( -1 );
ompi_ddt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 );
ompi_ddt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 );
ompi_ddt_add( pdt1, ompi_ddt_basicDatatypes[DT_DOUBLE], 1, 0, -1 );
ompi_ddt_add( pdt1, ompi_ddt_basicDatatypes[DT_CHAR], 1, 8, -1 );
ompi_ddt_dump( pdt1 );
types[1] = pdt1;
@ -483,14 +483,14 @@ dt_desc_t* create_strange_dt( void )
{
sdata_intern v[2];
long displ[3];
dt_desc_t* types[3] = { &(basicDatatypes[DT_INT]) };
dt_desc_t* types[3] = { ompi_ddt_basicDatatypes[DT_INT] };
sstrange t[2];
int pBlock[3] = {1, 10, 1}, dispi[3];
dt_desc_t *pdt, *pdt1, *pdt2, *pdtTemp;
dispi[0] = (int)((char*)&(v[0].i1) - (char*)&(v[0])); /* 0 */
dispi[1] = (int)(((char*)(&(v[0].i2)) - (char*)&(v[0])) / sizeof(int)); /* 2 */
ompi_ddt_create_indexed_block( 2, 1, dispi, &(basicDatatypes[DT_INT]), &pdtTemp );
ompi_ddt_create_indexed_block( 2, 1, dispi, ompi_ddt_basicDatatypes[DT_INT], &pdtTemp );
#ifdef USE_RESIZED
/* optional */
displ[0] = 0;
@ -502,7 +502,7 @@ dt_desc_t* create_strange_dt( void )
#endif /* USE_RESIZED */
types[1] = pdt1;
types[2] = &(basicDatatypes[DT_INT]);
types[2] = ompi_ddt_basicDatatypes[DT_INT];
displ[0] = 0;
displ[1] = (long)((char*)&(t[0].v[0]) - (char*)&(t[0]));
displ[2] = (long)((char*)&(t[0].last) - (char*)&(t[0]));
@ -544,6 +544,7 @@ int local_copy_ddt_count( dt_desc_t* pdt, int count )
OBJ_RELEASE( pdt ); assert( pdt == NULL );
return OMPI_SUCCESS;
}
int main( int argc, char* argv[] )
{
dt_desc_t *pdt, *pdt1, *pdt2, *pdt3;
@ -551,57 +552,57 @@ int main( int argc, char* argv[] )
ompi_ddt_init();
pdt = create_strange_dt();
OBJ_RELEASE( pdt ); assert( pdt == NULL );
/* pdt = create_strange_dt(); */
/* OBJ_RELEASE( pdt ); assert( pdt == NULL ); */
pdt = upper_matrix(100);
local_copy_ddt_count(pdt, 1);
OBJ_RELEASE( pdt ); assert( pdt == NULL );
/* pdt = upper_matrix(100); */
/* local_copy_ddt_count(pdt, 1); */
/* OBJ_RELEASE( pdt ); assert( pdt == NULL ); */
mpich_typeub();
mpich_typeub2();
mpich_typeub3();
/* mpich_typeub(); */
/* mpich_typeub2(); */
/* mpich_typeub3(); */
rc = test_upper( length );
if( rc == 0 )
printf( "decode [PASSED]\n" );
else
printf( "decode [NOT PASSED]\n" );
printf( "decode [NOT PASSED]\n" );
pdt = test_matrix_borders( length, 100 );
ompi_ddt_dump( pdt );
OBJ_RELEASE( pdt ); assert( pdt == NULL );
/* pdt = test_matrix_borders( length, 100 ); */
/* ompi_ddt_dump( pdt ); */
/* OBJ_RELEASE( pdt ); assert( pdt == NULL ); */
printf( ">>--------------------------------------------<<\n" );
pdt = test_contiguous();
OBJ_RELEASE( pdt ); assert( pdt == NULL );
printf( ">>--------------------------------------------<<\n" );
pdt = test_struct();
OBJ_RELEASE( pdt ); assert( pdt == NULL );
printf( ">>--------------------------------------------<<\n" );
/* printf( ">>--------------------------------------------<<\n" ); */
/* pdt = test_contiguous(); */
/* OBJ_RELEASE( pdt ); assert( pdt == NULL ); */
/* printf( ">>--------------------------------------------<<\n" ); */
/* pdt = test_struct(); */
/* OBJ_RELEASE( pdt ); assert( pdt == NULL ); */
/* printf( ">>--------------------------------------------<<\n" ); */
pdt1 = ompi_ddt_create( -1 );
pdt2 = ompi_ddt_create( -1 );
pdt3 = ompi_ddt_create( -1 );
ompi_ddt_add( pdt3, &(basicDatatypes[DT_INT]), 10, 0, -1 );
ompi_ddt_add( pdt3, &(basicDatatypes[DT_FLOAT]), 5, 10 * sizeof(int), -1 );
/* pdt1 = ompi_ddt_create( -1 ); */
/* pdt2 = ompi_ddt_create( -1 ); */
/* pdt3 = ompi_ddt_create( -1 ); */
/* ompi_ddt_add( pdt3, ompi_ddt_basicDatatypes[DT_INT], 10, 0, -1 ); */
/* ompi_ddt_add( pdt3, ompi_ddt_basicDatatypes[DT_FLOAT], 5, 10 * sizeof(int), -1 ); */
ompi_ddt_add( pdt2, &(basicDatatypes[DT_INT]), 1, 0, -1 );
ompi_ddt_add( pdt2, pdt3, 3, sizeof(int) * 1, -1 );
/* ompi_ddt_add( pdt2, ompi_ddt_basicDatatypes[DT_INT], 1, 0, -1 ); */
/* ompi_ddt_add( pdt2, pdt3, 3, sizeof(int) * 1, -1 ); */
ompi_ddt_add( pdt1, &(basicDatatypes[DT_LONG_LONG]), 5, 0, -1 );
ompi_ddt_add( pdt1, &(basicDatatypes[DT_LONG_DOUBLE]), 2, sizeof(long long) * 5, -1 );
/* ompi_ddt_add( pdt1, ompi_ddt_basicDatatypes[DT_LONG_LONG], 5, 0, -1 ); */
/* ompi_ddt_add( pdt1, ompi_ddt_basicDatatypes[DT_LONG_DOUBLE], 2, sizeof(long long) * 5, -1 ); */
printf( ">>--------------------------------------------<<\n" );
ompi_ddt_dump( pdt1 );
printf( ">>--------------------------------------------<<\n" );
ompi_ddt_dump( pdt2 );
printf( ">>--------------------------------------------<<\n" );
ompi_ddt_dump( pdt3 );
/* printf( ">>--------------------------------------------<<\n" ); */
/* ompi_ddt_dump( pdt1 ); */
/* printf( ">>--------------------------------------------<<\n" ); */
/* ompi_ddt_dump( pdt2 ); */
/* printf( ">>--------------------------------------------<<\n" ); */
/* ompi_ddt_dump( pdt3 ); */
OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL );
OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL );
OBJ_RELEASE( pdt3 ); assert( pdt3 == NULL );
/* OBJ_RELEASE( pdt1 ); assert( pdt1 == NULL ); */
/* OBJ_RELEASE( pdt2 ); assert( pdt2 == NULL ); */
/* OBJ_RELEASE( pdt3 ); assert( pdt3 == NULL ); */
/* clean-ups all data allocations */
ompi_ddt_finalize();

Просмотреть файл

@ -82,7 +82,7 @@ int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
pdtBase->desc.used++;
pdtBase->btypes[pdtAdd->id] += count;
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
if( extent == pdtAdd->size )
if( extent == (int)pdtAdd->size )
pLast->flags |= DT_FLAG_CONTIGUOUS;
} else {
/* now we add a complex datatype */
@ -91,9 +91,9 @@ int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
}
/* keep trace of the total number of basic datatypes in the datatype definition */
pdtBase->btypes[DT_LOOP] += pdtAdd->btypes[DT_LOOP];
for( i = 3; i < DT_MAX_PREDEFINED; i++ )
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
pdtBase->btypes[DT_END_LOOP] += pdtAdd->btypes[DT_END_LOOP];
for( i = 4; i < DT_MAX_PREDEFINED; i++ )
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
/* if the extent of the datatype if the same as the extent of the loop
* description of the datatype then we simply have to update the main loop.
@ -118,7 +118,7 @@ int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
pLast->extent = pdtAdd->desc.desc[i].extent;
pLast->disp = pdtAdd->desc.desc[i].disp;
if( pdtAdd->desc.desc[i].type != DT_LOOP )
pLast->disp += disp/* + pdtAdd->lb */;
pLast->disp += disp /* + pdtAdd->lb */;
pLast++;
}
pdtBase->desc.used += pdtAdd->desc.used;
@ -183,7 +183,7 @@ int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
* The only way for the data to be contiguous is to have the true extent equal to his size.
* In other words to avoid having internal gaps between elements.
*/
if( (pdtBase->size != (pdtBase->true_ub - pdtBase->true_lb)) ||
if( ((int)pdtBase->size != (pdtBase->true_ub - pdtBase->true_lb)) ||
!(pdtBase->flags & DT_FLAG_CONTIGUOUS) || !(pdtAdd->flags & DT_FLAG_CONTIGUOUS) )
UNSET_CONTIGUOUS_FLAG(pdtBase->flags);

Просмотреть файл

@ -9,10 +9,10 @@
0, 0, 0, 0, 0, 0, 0, 0, 0 }
#define EMPTY_DATA(NAME) NULL, 0, "MPI_" # NAME, {0, 0, NULL}, {0, 0, NULL}, NULL, ZERO_DDT_ARRAY
#define BASEOBJ_DATA { OBJ_CLASS(ompi_datatype_t), 1 }
#define INIT_BASIC_DATA( TYPE, ALIGN, NAME ) \
{ BASEOBJ_DATA, sizeof(TYPE), 0, sizeof(TYPE), ALIGN, \
0, sizeof(TYPE), DT_FLAG_BASIC | DT_FLAG_DATA, DT_##NAME, 1, \
(((unsigned long long)1)<<(DT_##NAME)), EMPTY_DATA(NAME) }
#define INIT_BASIC_DATA( TYPE, ALIGN, NAME ) \
{ BASEOBJ_DATA, sizeof(TYPE), ALIGN, 0, sizeof(TYPE), \
0, sizeof(TYPE), DT_FLAG_BASIC | DT_FLAG_DATA, DT_##NAME, 1, \
(((unsigned long long)1)<<(DT_##NAME)), EMPTY_DATA(NAME) }
/* Using this macro implies that at this point not all informations needed
* to fill up the datatype are known. We fill them with zeros and then later
* when the datatype engine will be initialized we complete with the
@ -494,7 +494,7 @@ void ompi_ddt_dump( dt_desc_t* data )
{
dt_desc_t* pData = (dt_desc_t*)data;
printf( "Datatype %p size %d align %d id %d length %d used %d\n\
printf( "Datatype %p size %ld align %d id %d length %d used %d\n\
true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n\
nbElems %d loops %d flags %X (",
(void*)pData, pData->size, pData->align, pData->id, pData->desc.length, pData->desc.used,

Просмотреть файл

@ -31,12 +31,6 @@ do { \
nbElems++; \
} while(0)
static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem )
{
while( _pElem->type == DT_LOOP ) ++_pElem;
return _pElem->disp;
}
int ompi_ddt_optimize_short( dt_desc_t* pData, int count,
dt_type_desc_t* pTypeDesc )
{
@ -64,42 +58,45 @@ int ompi_ddt_optimize_short( dt_desc_t* pData, int count,
while( stack_pos >= 0 ) {
if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
dt_elem_desc_t* pStartLoop;
dt_loop_desc_t* pStartLoop;
if( lastLength != 0 ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastDisp += lastLength;
lastLength = 0;
}
pStartLoop = &(pTypeDesc->desc[pStack->index - 1]);
pStartLoop = (dt_loop_desc_t*)&(pTypeDesc->desc[pStack->index - 1]);
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].flags,
nbElems - pStack->index + 1, /* # of elems in this loop */
pData->desc.desc[pos_desc].disp,
pData->desc.desc[pos_desc].extent );
pStack--; /* go down one position on the stack */
if( --stack_pos >= 0 ) { /* still something to do ? */
pStartLoop->disp = (pElemDesc - 1)->count;
pStartLoop->loops = (pElemDesc - 1)->count;
totalDisp = pStack->disp; /* update the displacement position */
}
pos_desc++;
continue;
}
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
dt_elem_desc_t* pEndLoop = &(pData->desc.desc[pos_desc + pData->desc.desc[pos_desc].disp]);
long loop_disp = GET_LOOP_DISP( &(pData->desc.desc[pos_desc]) );
if( pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_loop_desc_t* loop = (dt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items + 1]);
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
long loop_disp = pData->desc.desc[pos_desc + index].disp;
if( loop->flags & DT_FLAG_CONTIGUOUS ) {
/* the loop is contiguous or composed by contiguous elements with a gap */
if( pData->desc.desc[pos_desc].extent == pEndLoop->extent ) {
if( loop->extent == end_loop->size ) {
/* the whole loop is contiguous */
if( (lastDisp + lastLength) != (totalDisp + loop_disp) ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastLength = 0;
lastDisp = totalDisp + loop_disp;
}
lastLength += pData->desc.desc[pos_desc].count * pEndLoop->extent;
lastLength += loop->loops * end_loop->size;
} else {
int counter = pData->desc.desc[pos_desc].count;
int counter = loop->loops;
if( (lastDisp + lastLength) == (totalDisp + loop_disp) ) {
lastLength += pEndLoop->extent;
lastLength += end_loop->size;
counter--;
}
if( lastLength != 0 ) {
@ -112,9 +109,9 @@ int ompi_ddt_optimize_short( dt_desc_t* pData, int count,
*/
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
counter, (long)2, pData->desc.desc[pos_desc].extent );
SAVE_DESC( pElemDesc, loop_disp, pEndLoop->extent );
SAVE_ELEM( pElemDesc, DT_END_LOOP, pEndLoop->flags,
2, pEndLoop->disp, pEndLoop->extent );
SAVE_DESC( pElemDesc, loop_disp, end_loop->size );
SAVE_ELEM( pElemDesc, DT_END_LOOP, end_loop->flags,
2, end_loop->total_extent, end_loop->size );
}
pos_desc += pData->desc.desc[pos_desc].disp + 1;
changes++;
@ -283,7 +280,7 @@ static int ompi_ddt_unroll( dt_desc_t* pData, int count )
int ompi_ddt_commit( dt_desc_t** data )
{
dt_desc_t* pData = (dt_desc_t*)*data;
dt_elem_desc_t* pLast = &(pData->desc.desc[pData->desc.used]);
dt_endloop_desc_t* pLast = (dt_endloop_desc_t*)&(pData->desc.desc[pData->desc.used]);
if( pData->flags & DT_FLAG_COMMITED ) return OMPI_SUCCESS;
pData->flags |= DT_FLAG_COMMITED;
@ -291,24 +288,24 @@ int ompi_ddt_commit( dt_desc_t** data )
/* let's add a fake element at the end just to avoid useless comparaisons
* in pack/unpack functions.
*/
pLast->type = DT_END_LOOP;
pLast->flags = 0;
pLast->count = pData->desc.used;
pLast->disp = pData->ub - pData->lb;
pLast->extent = pData->size;
pLast->type = DT_END_LOOP;
pLast->flags = 0;
pLast->items = pData->desc.used;
pLast->total_extent = pData->ub - pData->lb;
pLast->size = pData->size;
/* If the data is contiguous is useless to generate an optimized version. */
if( pData->size != (pData->true_ub - pData->true_lb) ) {
if( (long)pData->size != (pData->true_ub - pData->true_lb) ) {
(void)ompi_ddt_optimize_short( pData, 1, &(pData->opt_desc) );
/* let's add a fake element at the end just to avoid useless comparaisons
* in pack/unpack functions.
*/
pLast = &(pData->opt_desc.desc[pData->opt_desc.used]);
pLast->type = DT_END_LOOP;
pLast->flags = 0;
pLast->count = pData->opt_desc.used;
pLast->disp = pData->ub - pData->lb;
pLast->extent = pData->size;
pLast = (dt_endloop_desc_t*)&(pData->opt_desc.desc[pData->opt_desc.used]);
pLast->type = DT_END_LOOP;
pLast->flags = 0;
pLast->items = pData->opt_desc.used;
pLast->total_extent = pData->ub - pData->lb;
pLast->size = pData->size;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -23,7 +23,6 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
unsigned int advance; /* number of bytes that we should advance the buffer */
int rc;
long disp_desc = 0; /* compute displacement for truncated data */
long disp; /* displacement at the beging of the last loop */
int bConverted = 0; /* number of bytes converted this time */
dt_desc_t *pData = pConvertor->pDesc;
dt_elem_desc_t* pElem;
@ -34,40 +33,29 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
iov[0].iov_base, iov[0].iov_len, *out_size );
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp = 0;
if( pData->opt_desc.desc != NULL ) pElem = pData->opt_desc.desc;
else pElem = pData->desc.desc;
if( pos_desc == -1 ) {
pos_desc = 0;
count_desc = pElem[0].count;
disp_desc = pElem[0].disp;
} else {
count_desc = pStack->count;
if( pElem[pos_desc].type != DT_LOOP ) {
pConvertor->stack_pos--;
pStack--;
disp = pStack->disp;
disp_desc = ( pElem[pos_desc].disp +
(pElem[pos_desc].count - count_desc) * pElem[pos_desc].extent);
}
}
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp_desc = pStack->disp;
count_desc = pStack->count;
pStack--;
pConvertor->stack_pos--;
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc >= 0 ) {
if( pElem[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConvertor->stack_pos == 0 )
goto complete_loop; /* completed */
pConvertor->stack_pos--;
pStack--;
if( pConvertor->stack_pos == -1 )
return 1; /* completed */
}
pos_desc = pStack->index;
if( pos_desc == -1 )
@ -75,42 +63,37 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
else
pStack->disp += pElem[pos_desc].extent;
pos_desc++;
disp = pStack->disp;
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
goto next_loop;
continue;
}
if( pElem[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElem[pos_desc].count,
disp, pos_desc + pElem[pos_desc].disp + 1);
pStack->disp, pos_desc + pElem[pos_desc].disp + 1);
pos_desc++;
} while( pElem[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loops" );
/* update the current state */
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
goto next_loop;
continue;
}
while( pElem[pos_desc].flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = pElem[pos_desc].type;
rc = pConvertor->pFunctions[type]( count_desc,
pOutput + disp + disp_desc, oCount, pElem[pos_desc].extent,
pOutput + pStack->disp + disp_desc, oCount, pElem[pos_desc].extent,
pInput, iCount, pElem[pos_desc].extent,
&advance );
if( rc <= 0 ) {
printf( "trash in the input buffer\n" );
return -1;
}
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
count_desc -= rc;
disp += rc * pElem[pos_desc].extent;
disp_desc += rc * pElem[pos_desc].extent;
if( iCount != 0 )
printf( "there is still room in the input buffer %d bytes\n", iCount );
goto complete_loop;
@ -133,7 +116,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, count_desc,
disp, pos_desc );
disp_desc, pos_desc );
return (pConvertor->bConverted == (pData->size * pConvertor->count));
}
@ -148,12 +131,12 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
u_int32_t pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int bConverted = 0; /* number of bytes converted/moved this time */
long lastDisp = 0, last_count = 0;
int space = iov[0].iov_len, last_blength = 0;
u_int32_t space = iov[0].iov_len, last_blength = 0;
char* pDestBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
@ -161,20 +144,22 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
pDestBuf = iov[0].iov_base;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
long true_extent = pData->true_ub - pData->true_lb;
char* pSrcBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
type = pConv->count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
if( (long)pData->size == true_extent ) {
/* we can do it with just one memcpy */
OMPI_DDT_SAFEGUARD_POINTER( pSrcBuf, iov[0].iov_len, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, pSrcBuf, iov[0].iov_len );
space -= iov[0].iov_len;
bConverted += iov[0].iov_len;
} else {
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pSrcBuf, pData->size, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, pSrcBuf, pData->size );
space -= pData->size;
pSrcBuf += extent;
pSrcBuf += true_extent;
pDestBuf += pData->size;
}
bConverted += type;
@ -202,12 +187,13 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
while( pos_desc >= 0 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --(pConv->stack_pos) == -1 ) { /* finish everything */
if( pConv->stack_pos == 0 ) { /* finish everything */
last_count = 0;
pos_desc = -1;
goto end_loop;
}
pStack--;
pConv->stack_pos--;
} else {
pos_desc = pStack->index; /* DT_LOOP index */
if( pos_desc == -1 )
@ -230,6 +216,8 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
last_count = space / pLast->extent;
}
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
pDestBuf += pLast->extent; /* size of the contiguous data */
lastDisp += pElems[pos_desc].extent;
@ -262,6 +250,8 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
last_blength -= last_count;
goto end_loop; /* or break whatever but go out of this while */
}
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_count,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, last_count );
bConverted += last_blength;
space -= last_blength;
@ -274,6 +264,8 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
last_count = 0; /* complete the data */
end_loop:
if( last_count != 0 ) { /* save the internal state */
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_count,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, last_count );
bConverted += last_count;
lastDisp += last_count;
@ -304,14 +296,14 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
unsigned int* max_data,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int i; /* index for basic elements with extent */
int iov_pos = 0; /* index in the iovec that we put data inside */
int bConverted = 0; /* number of bytes converted/moved this time */
int space_on_iovec; /* amount of free space on the current iovec */
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int i; /* index for basic elements with extent */
u_int32_t iov_pos = 0; /* index in the iovec that we put data inside */
int bConverted = 0; /* number of bytes converted/moved this time */
u_int32_t space_on_iovec; /* amount of free space on the current iovec */
long lastDisp = 0, last_count = 0;
int space = *max_data, last_blength = 0, saveLength;
u_int32_t space = *max_data, last_blength = 0, saveLength;
char *pDestBuf, *savePos;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
@ -342,11 +334,10 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
next_loop:
while( pos_desc >= 0 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --(pConv->stack_pos) == -1 ) { /* finish everything */
if( saveLength != 0 ) {
if( pConv->stack_pos == 0 ) { /* finish everything */
if( saveLength != 0 ) {
/* there is still a chunk of memory to be handled, but here we dont allocate more
* memory. We just copy what we can in the right place and update the values to be
* saved on the next round.
@ -363,6 +354,8 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
/* let's go out of here */
} else {
if( space_on_iovec > saveLength ) {
OMPI_DDT_SAFEGUARD_POINTER( savePos, saveLength,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, savePos, saveLength );
savePos += saveLength;
pDestBuf += saveLength;
@ -370,6 +363,8 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
space_on_iovec -= saveLength;
saveLength = 0;
} else {
OMPI_DDT_SAFEGUARD_POINTER( savePos, space_on_iovec,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, savePos, space_on_iovec );
savePos += space_on_iovec;
pDestBuf += space_on_iovec;
@ -385,6 +380,8 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
pos_desc = -1;
goto end_loop;
}
pConv->stack_pos--;
pStack--;
} else {
pos_desc = pStack->index; /* DT_LOOP index */
if( pos_desc == -1 )
@ -393,13 +390,11 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++; /* go to the next element */
last_count = pElems[pos_desc].count;
last_blength = last_count;
lastDisp = pStack->disp + pElems[pos_desc].disp;
goto next_loop;
}
while( pElems[pos_desc].type == DT_LOOP ) {
int stop_in_loop = 0;
/* If the loop container is contiguous then we can do some
* optimizations.
*/
@ -419,6 +414,8 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
}
/* Now let's do it */
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
lastDisp += pElems[pos_desc].extent;
pDestBuf += pLast->extent;
@ -442,11 +439,12 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
last_count = pElems[pos_desc].count;
lastDisp = pStack->disp + pElems[pos_desc].disp;
}
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
/* do we have enough space in the buffer ? */
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
@ -492,6 +490,8 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
}
/* In all the others cases we simply copy as much data as possible */
if( space_on_iovec > saveLength ) {
OMPI_DDT_SAFEGUARD_POINTER( savePos, saveLength,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, savePos, saveLength );
pDestBuf += saveLength;
/* update the pack counters values */
@ -502,6 +502,8 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
saveLength = last_blength;
break;
} else {
OMPI_DDT_SAFEGUARD_POINTER( savePos, space_on_iovec,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDestBuf, savePos, space_on_iovec );
/* let's prepare for the next round. As I keep trace of the amount that I still
* have to pack, the next time when I came here, I'll try to append something.
@ -532,8 +534,6 @@ int ompi_convertor_pack_homogeneous( ompi_convertor_t* pConv,
if( iov_pos == (*out_size) ) goto end_loop;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
}
}
last_count = 0; /* complete the data */
@ -564,15 +564,17 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
{
dt_desc_t* pData = pConv->pDesc;
char* pSrc = pConv->pBaseBuf + pData->true_lb;
dt_stack_t* pStack = &(pConv->pStack[pConv->stack_pos]);
char* pDest;
size_t length = pData->size * pConv->count;
long extent;
unsigned int max_allowed = *max_data;
int i, index;
u_int32_t max_allowed = *max_data;
u_int32_t i, index;
i = pConv->bConverted / pData->size; /* how many we already pack */
extent = pData->ub - pData->lb;
pSrc += i * extent; /* the real starting point */
pSrc = pConv->pBaseBuf + pStack->disp + pStack->count; /* actual starting point for the conversion */
*freeAfter = 0;
/* There are some optimizations that can be done if the upper level
* does not provide a buffer.
@ -587,6 +589,7 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
iov[index].iov_base = pSrc;
iov[index].iov_len = pData->size;
pSrc += extent;
pConv->bConverted += pData->size;
}
*out_size = index;
*max_data = index * pData->size;
@ -619,7 +622,7 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
}
}
if( pData->size == extent ) { /* that really contiguous */
if( (long)pData->size == extent ) { /* that really contiguous */
if( iov[0].iov_base == NULL ) {
iov[0].iov_base = pSrc; /* + pConv->bConverted; */
if( (pConv->bConverted + iov[0].iov_len) > length )
@ -627,11 +630,13 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
} else {
/* contiguous data just memcpy the smallest data in the user buffer */
iov[0].iov_len = IMIN( iov[0].iov_len, length );
OMPI_DDT_SAFEGUARD_POINTER( pSrc, iov[0].iov_len,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( iov[0].iov_base, pSrc, iov[0].iov_len);
}
*max_data = iov[0].iov_len;
} else {
int done, counter;
u_int32_t done, counter;
if( iov[0].iov_base == NULL ) {
iov[0].iov_base = pConv->memAlloc_fn( &(iov[0].iov_len) );
@ -646,6 +651,7 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
pSrc += done;
if( done != 0 ) { /* still some data to copy from the last time */
done = pData->size - done;
OMPI_DDT_SAFEGUARD_POINTER( pSrc, done, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDest, pSrc, done );
pDest += done;
max_allowed -= done;
@ -655,6 +661,7 @@ int ompi_convertor_pack_homogeneous_contig( ompi_convertor_t* pConv,
counter = max_allowed / pData->size;
if( counter > pConv->count ) counter = pConv->count;
for( i = 0; i < counter; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pSrc, pData->size, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDest, pSrc, pData->size );
pDest += pData->size;
pSrc += extent;
@ -698,7 +705,7 @@ int ompi_convertor_pack( ompi_convertor_t* pConv,
int* freeAfter )
{
dt_desc_t* pData = pConv->pDesc;
int done = 0, index = 0;
u_int32_t done = 0, index = 0;
*freeAfter = 0; /* nothing to free yet */
/* TODO should use the remote size */
@ -772,13 +779,13 @@ int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
pConv->fAdvance = ompi_convertor_pack_homogeneous_contig;
} else {
/* TODO handle the sender convert case */
pConv->fAdvance = ompi_convertor_pack_general;
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
pConv->fAdvance = ompi_convertor_pack_homogeneous;
#if defined(ONE_STEP)
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
#endif /* ONE_STEP */
}
pConv->fAdvance = ompi_convertor_pack_general;
if( starting_pos != 0 ) {
return ompi_convertor_create_stack_with_pos( pConv, starting_pos, ompi_ddt_local_sizes );
}

Просмотреть файл

@ -55,11 +55,11 @@ int ompi_ddt_sndrcv(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf,
scount, NULL, 0, NULL );
err = ompi_convertor_get_packed_size(local_convertor, &size);
OBJ_RELEASE(local_convertor);
if (OMPI_SUCCESS != err) {
if( OMPI_SUCCESS != err ) {
return err;
}
if (size <= rcount) {
if( (int)size <= rcount ) {
err = MPI_Pack(sbuf, scount, sdtype,
rbuf, rcount, &position, MPI_COMM_WORLD);
} else {
@ -75,11 +75,11 @@ int ompi_ddt_sndrcv(void *sbuf, int scount, MPI_Datatype sdtype, void *rbuf,
rcount, NULL, 0, NULL );
err = ompi_convertor_get_packed_size(local_convertor, &size);
OBJ_RELEASE(local_convertor);
if (OMPI_SUCCESS != err) {
if( OMPI_SUCCESS != err ) {
return err;
}
if (scount <= size) {
if( scount <= (int)size ) {
err = MPI_Unpack(sbuf, scount, &position,
rbuf, rcount, rdtype,
MPI_COMM_WORLD);

Просмотреть файл

@ -51,91 +51,70 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
unsigned int advance; /* number of bytes that we should advance the buffer */
int rc;
long disp_desc = 0; /* compute displacement for truncated data */
long disp; /* displacement at the beging of the last loop */
int bConverted = 0; /* number of bytes converted this time */
dt_desc_t *pData = pConvertor->pDesc;
dt_elem_desc_t* pElems;
char* pOutput = pConvertor->pBaseBuf;
int oCount = (pData->ub - pData->lb) * pConvertor->count;
int oCount = (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count;
char* pInput = iov[0].iov_base;
int iCount = iov[0].iov_len;
if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc;
else pElems = pData->desc.desc;
/* For the general case always use the user data description */
pElems = pConvertor->pDesc->desc.desc;
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp = 0;
if( pos_desc == -1 ) {
pos_desc = 0;
count_desc = pElems[0].count;
disp_desc = pElems[0].disp;
} else {
count_desc = pStack->count;
if( pElems[pos_desc].type != DT_LOOP ) {
pConvertor->stack_pos--;
pStack--;
disp = pStack->disp;
disp_desc = ( pElems[pos_desc].disp +
(pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent);
}
}
pos_desc = pStack->index;
count_desc = pStack->count;
disp_desc = pStack->disp;
pStack--;
pConvertor->stack_pos--;
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc >= 0 ) {
while( 1 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConvertor->stack_pos == 0 )
goto save_and_return; /* completed */
pConvertor->stack_pos--;
pStack--;
if( pConvertor->stack_pos == -1 )
return 1; /* completed */
}
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
pStack->disp += (pConvertor->pDesc->ub - pConvertor->pDesc->lb);
else
pStack->disp += pElems[pos_desc].extent;
pos_desc++;
disp = pStack->disp;
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElems[pos_desc].count,
disp, pos_desc + pElems[pos_desc].disp + 1 );
pStack->disp, pos_desc + pElems[pos_desc].disp + 1 );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
/* update the current state */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
rc = pConvertor->pFunctions[type]( count_desc,
pInput, iCount, pElems[pos_desc].extent,
pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent,
&advance );
if( rc <= 0 ) {
printf( "trash in the input buffer\n" );
return OMPI_ERROR;
}
pConvertor->pBaseBuf + pStack->disp + disp_desc, oCount,
pElems[pos_desc].extent, &advance );
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
count_desc -= rc;
disp += rc * pElems[pos_desc].extent;
disp_desc += rc * pElems[pos_desc].extent;
if( iCount != 0 )
printf( "there is still room in the input buffer %d bytes\n", iCount );
goto save_and_return;
@ -144,7 +123,8 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
pos_desc++; /* advance to the next data */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
if( iCount == 0 )
goto save_and_return; /* break if there is no more data in the buffer */
}
}
save_and_return:
@ -157,9 +137,9 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
count_desc, disp, pos_desc );
count_desc, disp_desc, pos_desc );
return (pConvertor->bConverted == (pConvertor->count * pData->size));
return (pConvertor->bConverted == (pConvertor->count * pConvertor->pDesc->size));
}
static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
@ -173,7 +153,7 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
int i; /* counter for basic datatype with extent */
int bConverted = 0; /* number of bytes converted this time */
long lastDisp = 0, last_count = 0;
int space = iov[0].iov_len, last_blength = 0;
size_t space = iov[0].iov_len, last_blength = 0;
char* pSrcBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
@ -198,12 +178,13 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
while( pos_desc >= 0 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --(pConv->stack_pos) == -1 ) {
if( pConv->stack_pos == 0 ) {
last_count = 0;
pos_desc = -1;
goto end_loop;
}
pStack--;
pConv->stack_pos--;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
@ -212,9 +193,6 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
last_blength = last_count;
goto next_loop;
}
while( pElems[pos_desc].type == DT_LOOP ) {
@ -227,6 +205,8 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
last_count = space / pLast->extent;
}
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, pLast->extent,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, pLast->extent );
pSrcBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
@ -245,11 +225,12 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
last_count = pElems[pos_desc].count;
lastDisp = pStack->disp + pElems[pos_desc].disp;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
/* do we have enough space in the buffer ? */
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
if( space < last_blength ) {
@ -259,18 +240,20 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
last_blength -= last_count;
goto end_loop; /* or break whatever but go out of this while */
}
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_blength,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_blength );
bConverted += last_blength;
space -= last_blength;
pSrcBuf += last_blength;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
}
}
last_count = 0; /* complete the data */
end_loop:
if( last_count != 0 ) { /* save the internal state */
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_blength,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_count );
bConverted += last_count;
lastDisp += last_count;
@ -294,31 +277,46 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
int* freeAfter )
{
dt_desc_t *pData = pConv->pDesc;
char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
char* pDstBuf = pConv->pBaseBuf;
char* pSrcBuf = iov[0].iov_base;
int bConverted = 0;
long extent = pData->ub - pData->lb;
int length, i;
long jump, extent = pData->ub - pData->lb;
unsigned int length, remaining, i;
dt_stack_t* stack = &(pConv->pStack[1]);
*out_size = 1;
if( iov[0].iov_base != NULL ) {
if( pData->size == extent ) {
if( (long)pData->size == extent ) {
pDstBuf += pData->true_lb + pConv->bConverted;
length = pConv->count * pData->size - pConv->bConverted;
if( length > iov[0].iov_len )
length = iov[0].iov_len;
/* contiguous data or basic datatype with count */
OMPI_DDT_SAFEGUARD_POINTER( pDstBuf, length,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDstBuf, pSrcBuf, length );
bConverted += length;
} else {
length = iov[0].iov_len;
for( i = 0; i < pConv->count; i++ ) {
MEMCPY( pDstBuf, pSrcBuf, pData->size );
pSrcBuf += pData->size;
pDstBuf += extent;
length -= pData->size;
pDstBuf += stack->disp;
length = pConv->bConverted / pData->size;
length = pConv->bConverted - length * pData->size;
jump = extent - length;
remaining = iov[0].iov_len;
for( i = 0; remaining < length; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pDstBuf, length, pConv->pBaseBuf, pData, pConv->count );
MEMCPY( pDstBuf, pSrcBuf, length );
pSrcBuf += length;
pDstBuf += jump;
remaining -= length;
length = pData->size;
jump = extent;
}
bConverted += iov[0].iov_len;
stack->disp = pDstBuf - pConv->pBaseBuf; /* save the position */
}
bConverted += length;
}
iov[0].iov_len = bConverted;
pConv->bConverted += bConverted;
@ -333,7 +331,7 @@ int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
int* freeAfter )
{
dt_desc_t *pData = pConvertor->pDesc;
int rc;
unsigned int length;
*freeAfter = 0;
if( pConvertor->bConverted == (pData->size * pConvertor->count) ) {
@ -344,15 +342,15 @@ int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
if( iov[0].iov_base == NULL ) {
rc = pConvertor->count * pData->size - pConvertor->bConverted;
length = pConvertor->count * pData->size - pConvertor->bConverted;
iov[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted;
if( iov[0].iov_len == 0 ) { /* give me the whole buffer */
} else { /* what about the next chunk ? */
if( iov[0].iov_len < rc )
rc = iov[0].iov_len;
if( iov[0].iov_len < length )
length = iov[0].iov_len;
}
iov[0].iov_len = rc;
pConvertor->bConverted += rc;
iov[0].iov_len = length;
pConvertor->bConverted += length;
return (pConvertor->bConverted == (pData->size * pConvertor->count));
}
}
@ -366,39 +364,38 @@ int ompi_convertor_unpack( ompi_convertor_t* pConvertor,
* and there are less data than the size on the remote host of the
* basic datatype.
*/
#define COPY_TYPE( TYPENAME, TYPE ) \
static int copy_##TYPENAME( unsigned int count, \
char* from, unsigned int from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent, \
int* used ) \
{ \
int i, res = 1; \
unsigned int remote_TYPE_size = sizeof(TYPE); /* TODO */ \
\
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
if( (count * remote_TYPE_size) != from_len ) { \
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_TYPE_size) ); \
res = -1; \
} \
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
} else \
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
\
if( (from_extent == sizeof(TYPE)) && (to_extent == sizeof(TYPE)) ) { \
MEMCPY( to, from, count * sizeof(TYPE) ); \
} else { \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, sizeof(TYPE) ); \
to += to_extent; \
from += from_extent; \
} \
} \
*used = count * sizeof(TYPE) ; \
return res * count; \
#define COPY_TYPE( TYPENAME, TYPE ) \
static int copy_##TYPENAME( unsigned int count, \
char* from, unsigned int from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent, \
int* used ) \
{ \
unsigned int i; \
unsigned int remote_TYPE_size = sizeof(TYPE); /* TODO */ \
\
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
if( (count * remote_TYPE_size) != from_len ) { \
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_TYPE_size) ); \
} \
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
} else \
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
\
if( (from_extent == sizeof(TYPE)) && (to_extent == sizeof(TYPE)) ) { \
MEMCPY( to, from, count * sizeof(TYPE) ); \
} else { \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, sizeof(TYPE) ); \
to += to_extent; \
from += from_extent; \
} \
} \
*used = count * sizeof(TYPE) ; \
return count; \
}
COPY_TYPE( char, char )
@ -406,11 +403,12 @@ COPY_TYPE( short, short )
COPY_TYPE( int, int )
COPY_TYPE( float, float )
COPY_TYPE( long, long )
/*COPY_TYPE( double, double );*/
/*COPY_TYPE( double, double )*/
COPY_TYPE( long_long, long long )
COPY_TYPE( long_double, long double )
COPY_TYPE( complex_float, ompi_complex_float_t )
COPY_TYPE( complex_double, ompi_complex_double_t )
COPY_TYPE( complex_long_double, ompi_complex_long_double_t )
static
int copy_double( unsigned int count,
@ -418,52 +416,78 @@ int copy_double( unsigned int count,
char* to, unsigned int to_len, long to_extent,
int* used )
{
int i, res = 1;
unsigned int remote_double_size = sizeof(double); /* TODO */
if( (remote_double_size * count) > from_len ) {
count = from_len / remote_double_size;
if( (count * remote_double_size) != from_len ) {
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n",
from_len - (count * remote_double_size) );
res = -1;
}
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n",
"double", count, from, from_len, to, to_len );
} else
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n",
"double", count, from, from_len, to, to_len );
unsigned int i;
unsigned int remote_double_size = sizeof(double); /* TODO */
if( (remote_double_size * count) > from_len ) {
count = from_len / remote_double_size;
if( (count * remote_double_size) != from_len ) {
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n",
from_len - (count * remote_double_size) );
}
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n",
"double", count, from, from_len, to, to_len );
} else
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n",
"double", count, from, from_len, to, to_len );
if( (from_extent == sizeof(double)) && (to_extent == sizeof(double)) ) {
MEMCPY( to, from, count * sizeof(double) );
} else {
for( i = 0; i < count; i++ ) {
MEMCPY( to, from, sizeof(double) );
to += to_extent;
from += from_extent;
}
}
*used = count * sizeof(double) ;
return res * count;
if( (from_extent == sizeof(double)) && (to_extent == sizeof(double)) ) {
MEMCPY( to, from, count * sizeof(double) );
} else {
for( i = 0; i < count; i++ ) {
MEMCPY( to, from, sizeof(double) );
to += to_extent;
from += from_extent;
}
}
*used = count * sizeof(double) ;
return count;
}
conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED] = {
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)NULL, /* DT_SPACE */
(conversion_fct_t)copy_char, /* DT_CHAR */
(conversion_fct_t)copy_char, /* DT_BYTE */
(conversion_fct_t)copy_short, /* DT_SHORT */
(conversion_fct_t)copy_int, /* DT_INT */
(conversion_fct_t)copy_float, /* DT_FLOAT */
(conversion_fct_t)copy_long, /* DT_LONG */
(conversion_fct_t)copy_double, /* DT_DOUBLE */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_END_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)copy_char, /* DT_CHAR */
(conversion_fct_t)copy_char, /* DT_CHARACTER */
(conversion_fct_t)copy_char, /* DT_UNSIGNED_CHAR */
(conversion_fct_t)copy_char, /* DT_BYTE */
(conversion_fct_t)copy_short, /* DT_SHORT */
(conversion_fct_t)copy_short, /* DT_UNSIGNED_SHORT */
(conversion_fct_t)copy_int, /* DT_INT */
(conversion_fct_t)copy_int, /* DT_UNSIGNED_INT */
(conversion_fct_t)copy_long, /* DT_LONG */
(conversion_fct_t)copy_long, /* DT_UNSIGNED_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG_INT */
(conversion_fct_t)copy_long_long, /* DT_UNSIGNED_LONG_LONG */
(conversion_fct_t)copy_float, /* DT_FLOAT */
(conversion_fct_t)copy_double, /* DT_DOUBLE */
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
(conversion_fct_t)copy_complex_long_double, /* DT_COMPLEX_LONG_DOUBLE */
(conversion_fct_t)NULL, /* DT_PACKED */
(conversion_fct_t)NULL, /* DT_LOGIC */
(conversion_fct_t)NULL, /* DT_FLOAT_INT */
(conversion_fct_t)NULL, /* DT_DOUBLE_INT */
(conversion_fct_t)NULL, /* DT_LONG_DOUBLE_INT */
(conversion_fct_t)NULL, /* DT_LONG_INT */
(conversion_fct_t)NULL, /* DT_2INT */
(conversion_fct_t)NULL, /* DT_SHORT_INT */
(conversion_fct_t)copy_int, /* DT_INTEGER */
(conversion_fct_t)copy_float, /* DT_REAL */
(conversion_fct_t)copy_double, /* DT_DBLPREC */
(conversion_fct_t)NULL, /* DT_2REAL */
(conversion_fct_t)NULL, /* DT_2DBLPREC */
(conversion_fct_t)NULL, /* DT_2INTEGER */
(conversion_fct_t)NULL, /* DT_WCHAR */
(conversion_fct_t)NULL, /* DT_2COMPLEX */
(conversion_fct_t)NULL, /* DT_2DOUBLE_COMPLEX */
(conversion_fct_t)NULL, /* DT_CXX_BOOL */
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
};
/* Should we supply buffers to the convertor or can we use directly
@ -499,8 +523,8 @@ int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, unsigned int flags,
pConv->pFunctions = ompi_ddt_copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
pConv->fAdvance = ompi_convertor_unpack_general; /* TODO: just stop complaining */
pConv->fAdvance = ompi_convertor_unpack_homogeneous; /* default behaviour */
pConv->fAdvance = ompi_convertor_unpack_general; /* TODO: just stop complaining */
pConv->memAlloc_fn = allocfn;
/* TODO: work only on homogeneous architectures */
@ -587,7 +611,118 @@ int ompi_ddt_get_element_count( dt_desc_t* pData, int iSize )
}
int ompi_ddt_copy_content_same_ddt( dt_desc_t* pData, int count,
char* pDestBuf, char* pSrcBuf )
char* pDestBuf, char* pSrcBuf )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int stack_pos = 0;
long lastDisp = 0, lastLength = 0;
dt_elem_desc_t* pElems;
/* empty data ? then do nothing. This should normally be trapped
* at a higher level.
*/
if( count == 0 ) return 0;
/* If we have to copy a contiguous datatype then simply
* do a memcpy.
*/
if( (pData->flags & DT_FLAG_CONTIGUOUS) == DT_FLAG_CONTIGUOUS ) {
long extent = (pData->ub - pData->lb);
if( (long)pData->size == extent ) { /* all contiguous */
int total_length = pData->size * count;
lastLength = 128 * 1024;
if( lastLength > total_length ) lastLength = total_length;
while( total_length > 0 ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf, lastLength,
pDestBuf, pData, count );
MEMCPY( pDestBuf, pSrcBuf, lastLength );
pDestBuf += lastLength;
pSrcBuf += lastLength;
total_length -= lastLength;
if( lastLength > total_length ) lastLength = total_length;
}
} else {
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf, pData->size,
pDestBuf, pData, count );
MEMCPY( pDestBuf, pSrcBuf, pData->size );
pDestBuf += extent;
pSrcBuf += extent;
}
}
return 0;
}
pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP] + 1) );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
while( 1 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) goto end_loop;
}
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
pos_desc++;
}
if( pElems[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
}
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
} else {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, pData, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
}
pos_desc++; /* advance to the next data */
}
}
end_loop:
if( lastLength != 0 ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, pData, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
}
/* cleanup the stack */
return 0;
}
#if defined(USELESS_CODE)
int ompi_ddt_copy_content_same_ddt2( dt_desc_t* pData, int count,
char* pDestBuf, char* pSrcBuf )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
@ -607,10 +742,14 @@ int ompi_ddt_copy_content_same_ddt( dt_desc_t* pData, int count,
if( (pData->flags & DT_FLAG_CONTIGUOUS) == DT_FLAG_CONTIGUOUS ) {
int extent = (pData->ub - pData->lb);
if( pData->size == extent ) { /* all contiguous */
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf, pData->size * count,
pDestBuf, pData, count );
MEMCPY( pDestBuf, pSrcBuf, pData->size * count );
} else {
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
memcpy( pDestBuf, pSrcBuf, pData->size );
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf, pData->size,
pDestBuf, pData, count );
MEMCPY( pDestBuf, pSrcBuf, pData->size );
pDestBuf += extent;
pSrcBuf += extent;
}
@ -618,7 +757,7 @@ int ompi_ddt_copy_content_same_ddt( dt_desc_t* pData, int count,
return 0;
}
pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP]+1) );
pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP] + 1) );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
@ -667,15 +806,21 @@ int ompi_ddt_copy_content_same_ddt( dt_desc_t* pData, int count,
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
} else {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, pData, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * ompi_ddt_basicDatatypes[type]->size;
}
pos_desc++; /* advance to the next data */
}
end_loop:
if( lastLength != 0 )
end_loop:
if( lastLength != 0 ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, pData, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
}
/* cleanup the stack */
return 0;
}
#endif /* USELESS_CODE */

Просмотреть файл

@ -10,17 +10,11 @@
#endif
#include <stdlib.h>
int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
int starting_point, int* sizes );
int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
int starting_point, int* sizes );
static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem )
{
while( _pElem->type == DT_LOOP ) ++_pElem;
return _pElem->disp;
}
int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
int starting_point, int* sizes )
int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
int starting_point, int* sizes )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
@ -38,24 +32,58 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
/* if the convertor continue from the last position
* there is nothing to do.
*/
if( pConvertor->bConverted == starting_point ) return OMPI_SUCCESS;
if( pConvertor->bConverted == (unsigned long)starting_point ) return OMPI_SUCCESS;
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
pConvertor->stack_pos = 0;
pStack = pConvertor->pStack;
/* Fill the first position on the stack. This one correspond to the
* last fake DT_END_LOOP that we add to the data representation and
* allow us to move quickly inside the datatype when we have a count.
*/
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
loop_length = GET_FIRST_NON_LOOP( pElems );
pStack->disp = pElems[loop_length].disp;
pStack->count = pConvertor->count;
pStack->index = -1;
pStack->end_loop = pData->desc.used;
pStack->disp = 0;
/* Special case for contiguous datatypes */
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
int cnt = starting_point / pData->size;
long extent = pData->ub - pData->lb;
pStack->count -= cnt;
pStack[1].index = 0;
pStack[1].count = starting_point - cnt * pData->size;
pStack[1].end_loop = pStack->end_loop;
if( (long)pData->size == extent ) { /* all elements are contiguous */
pStack[1].disp = pStack->disp + starting_point;
} else { /* each is contiguous but there are gaps inbetween */
pStack[1].disp = pStack->disp /* original place */
+ cnt * extent /* the completed elements with their extent */
+ pStack[1].count; /* what we complete from the last begining of the data */
}
pConvertor->bConverted = starting_point;
pConvertor->stack_pos = 1;
return OMPI_SUCCESS;
}
pos_desc = 0;
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
remoteLength[0] = 0; /* initial value set to ZERO */
pConvertor->stack_pos = 0;
pElems = &(pData->desc.desc[pos_desc]);
next_loop:
totalDisp = pStack->disp;
loop_length = remoteLength[pConvertor->stack_pos];
while( pos_desc >= 0 ) {
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)pElems;
/* now we know the length of the loop. We can compute
* if the the starting_position will happend in one of the
* iterations of this loop.
@ -63,141 +91,14 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
remoteLength[pConvertor->stack_pos] = loop_length;
if( (loop_length * pStack->count) > resting_place ) {
/* OK here we stop in this loop. First save the loop
* on the stack, then save the position of the last
* data */
* on the stack, then save the position of the last data
*/
int cnt = resting_place / loop_length;
dt_loop_desc_t* loop = (dt_loop_desc_t*)(pElems - end_loop->items - 1);
pStack->count -= cnt;
resting_place -= cnt * loop_length;
pStack->disp += cnt * pElems->extent;
pConvertor->bConverted += (cnt * loop_length);
goto next_loop;
}
/* Not in this loop. Cleanup the stack and advance to the
* next data description.
*/
pConvertor->stack_pos--;
pStack--;
pos_desc++;
pElems++;
goto next_loop;
}
if( pElems->type == DT_LOOP ) {
remoteLength[pConvertor->stack_pos + 1] = 0;
totalDisp = pElems->disp;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pData->desc.desc[pos_desc].count,
totalDisp, pos_desc + pElems->disp );
pos_desc++;
pElems++;
loop_length = 0; /* starting a new loop */
goto next_loop;
}
/* now here we have a basic datatype */
type = pElems->type;
lastLength = pElems->count * ompi_ddt_basicDatatypes[type]->size;
if( resting_place > lastLength ) {
resting_place -= lastLength;
loop_length += lastLength;
} else {
int cnt = resting_place / ompi_ddt_basicDatatypes[type]->size;
resting_place -= cnt * ompi_ddt_basicDatatypes[type]->size;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pElems->count - cnt,
totalDisp + pElems->disp + cnt * pElems->extent,
pos_desc );
pConvertor->bConverted += (starting_point - resting_place);
PUSH_STACK( pStack, pConvertor->stack_pos, 0, 0, 0, 0 );
return OMPI_SUCCESS;
}
pos_desc++; /* advance to the next data */
pElems++;
}
PUSH_STACK( pStack, pConvertor->stack_pos, 0, 0, 0, 0 );
return OMPI_SUCCESS;
}
/* This function works for homogeneous architectures. As we keep
* trace of the size inside the loop in the END_LOOP element
* we can easily jump directly where we need. It works only
* because we can split a basic data in the middle if we
* have a optimized representation.
*/
int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
int starting_point, int* sizes )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type, lastLength = 0;
long totalDisp;
ompi_datatype_t* pData = pConvertor->pDesc;
int* remoteLength;
int loop_length;
int resting_place = starting_point;
dt_elem_desc_t* pElems;
if( starting_point == 0 ) {
return ompi_convertor_create_stack_at_begining( pConvertor, sizes );
}
/* if the convertor continue from the last position
* there is nothing to do.
*/
if( pConvertor->bConverted == starting_point ) return OMPI_SUCCESS;
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
int cnt;
cnt = starting_point / pData->size;
pConvertor->stack_pos = 1;
pConvertor->pStack[0].index = 0;
pConvertor->pStack[0].count = pConvertor->count - cnt;
pConvertor->pStack[0].disp = 0;
/* first here we should select which data representation will be used for
* this operation: normal one or the optimized version ? */
if( pData->opt_desc.used > 0 ) {
pElems = pData->opt_desc.desc;
pConvertor->pStack[0].end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pConvertor->pStack[0].end_loop = pData->desc.used;
}
cnt = starting_point - cnt * pData->size;
pConvertor->pStack[1].index = 0;
pConvertor->pStack[1].count = pElems->count - cnt;
pConvertor->pStack[1].disp = pElems->disp + cnt;
pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
pConvertor->bConverted = starting_point;
return OMPI_SUCCESS;
}
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
pStack = pConvertor->pStack;
pStack->count = pConvertor->count;
pStack->index = -1;
pStack->end_loop = pData->desc.used;
pStack->disp = 0;
pos_desc = 0;
remoteLength[0] = 0; /* initial value set to ZERO */
pConvertor->stack_pos = 0;
pElems = &(pData->desc.desc[pos_desc]);
next_loop:
totalDisp = pStack->disp;
loop_length = remoteLength[pConvertor->stack_pos];
while( pos_desc < pStack->end_loop ) {
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
/* now we know the length of the loop. We can compute
* if the the starting_position will happend in one of the
* iterations of this loop.
*/
remoteLength[pConvertor->stack_pos] = loop_length;
if( (loop_length * pStack->count) > resting_place ) {
/* OK here we stop in this loop. First save the loop
* on the stack, then save the position of the last
* data */
int cnt = resting_place / loop_length;
pStack->count -= cnt;
resting_place -= cnt * loop_length;
pStack->disp += cnt * pElems->extent;
pConvertor->bConverted += (cnt * loop_length);
pStack->disp += cnt * loop->extent;
pos_desc -= end_loop->items; /* go back to the first element in the loop */
goto next_loop;
}
/* Not in this loop. Cleanup the stack and advance to the
@ -242,5 +143,7 @@ int ompi_convertor_create_stack_with_pos( ompi_convertor_t* pConvertor,
}
PUSH_STACK( pStack, pConvertor->stack_pos, 0, 0, 0, 0 );
/* Correctly update the bConverted field */
pConvertor->bConverted = starting_point - resting_place;
return OMPI_SUCCESS;
}